from govcrawler.parser.list_parser import parse_list

HTML = """
<html><body>
  <ul class="list_news">
    <li><a href="/x/post_1.html">标题 1</a><span class="date">2026-04-10</span></li>
    <li><a href="/x/post_2.html">标题 2</a><span class="date">2026-04-09</span></li>
    <li><!-- no anchor --></li>
  </ul>
</body></html>
"""

SEL = {
    "row": "ul.list_news li",
    "href": "a::attr(href)",
    "title": "a::text",
    "date": "span.date::text",
}


def test_parse_list_extracts_rows():
    items = parse_list(HTML, "https://www.gdqy.gov.cn/x/", SEL)
    assert len(items) == 2
    assert items[0].url == "https://www.gdqy.gov.cn/x/post_1.html"
    assert items[0].title == "标题 1"
    assert items[0].publish_time_raw == "2026-04-10"


def test_parse_list_skips_anchorless_row():
    items = parse_list(HTML, "https://www.gdqy.gov.cn/x/", SEL)
    assert all(it.url for it in items)


def test_parse_list_empty_html():
    assert parse_list("", "https://x/", SEL) == []
