from govcrawler.parser.list_parser import parse_list HTML = """

标题 12026-04-10
标题 22026-04-09

""" SEL = { "row": "ul.list_news li", "href": "a::attr(href)", "title": "a::text", "date": "span.date::text", } def test_parse_list_extracts_rows(): items = parse_list(HTML, "https://www.gdqy.gov.cn/x/", SEL) assert len(items) == 2 assert items[0].url == "https://www.gdqy.gov.cn/x/post_1.html" assert items[0].title == "标题 1" assert items[0].publish_time_raw == "2026-04-10" def test_parse_list_skips_anchorless_row(): items = parse_list(HTML, "https://www.gdqy.gov.cn/x/", SEL) assert all(it.url for it in items) def test_parse_list_empty_html(): assert parse_list("", "https://x/", SEL) == []