from govcrawler.parser.detail_parser import parse_detail from govcrawler.sites.gdqy import DETAIL_SELECTORS SAMPLE_HTML = """

关于无人驾驶航空器安全管控的公告

2026-04-10 16:34:22 清远市人民政府

第一段：为保障 2026 年第四届全国轻型飞机锦标赛航空嘉年华活动期间空域安全，现就相关事项公告如下。

第二段：管控时间自 2026 年 5 月 1 日起至 5 月 10 日止。

第三段：管控区域为清远市清城区、清新区以北半径 30 公里内空域。

附件：公告全文.pdf

""" def test_main_xpath_path(): r = parse_detail(SAMPLE_HTML, "https://www.gdqy.gov.cn/x/post_1.html", DETAIL_SELECTORS) assert "无人驾驶航空器安全管控" in r.title assert "2026-04-10" in r.publish_time_raw assert "清远市人民政府" in r.source assert "第一段" in r.content_text assert r.used_fallback is False assert r.fallback_engine is None def test_attachment_absolute(): r = parse_detail(SAMPLE_HTML, "https://www.gdqy.gov.cn/x/post_1.html", DETAIL_SELECTORS) assert len(r.attachment_urls) == 1 assert r.attachment_urls[0].startswith("https://www.gdqy.gov.cn/") assert r.attachment_urls[0].endswith("notice.pdf") def test_content_text_no_html_tags(): r = parse_detail(SAMPLE_HTML, "https://www.gdqy.gov.cn/x/post_1.html", DETAIL_SELECTORS) assert "

" not in r.content_text assert "\n" in r.content_text # paragraph preserved