import pytest
from govcrawler.parser import detail_parser
from govcrawler.parser.detail_parser import parse_detail

# 注意：这里使用与 SAMPLE_HTML 相似的 HTML，但 detail selectors 指向一个不存在的 class
BROKEN_SELECTORS = {
    "title": "h1.nonexistent-class::text",
    "publish_time": "span.nonexistent-time::text",
    "source": "span.nonexistent-source::text",
    "content": "div.nonexistent-content",  # 这会强制触发兜底
    "attachment_css": "a[href$='.pdf']",
}

RICH_HTML = """
<html><head><title>清远市政府公告</title></head><body>
  <article>
    <h1>关于无人驾驶航空器安全管控的公告</h1>
    <div class="post-meta">2026-04-10 清远市人民政府</div>
    <div class="post-body">
      <p>为保障 2026 年第四届全国轻型飞机锦标赛航空嘉年华活动期间空域安全，现就相关事项公告如下。
      管控时间自 2026 年 5 月 1 日起至 5 月 10 日止。管控区域为清远市清城区、清新区以北半径 30 公里内空域。
      请广大无人机爱好者提前做好飞行计划调整，感谢配合。如有疑问请联系清远市公安局治安支队。</p>
      <p>本公告自发布之日起施行，解释权归清远市人民政府办公室。</p>
    </div>
  </article>
</body></html>
"""


@pytest.mark.network_free
def test_gne_fallback_triggered_when_xpath_fails():
    r = parse_detail(RICH_HTML, "https://www.gdqy.gov.cn/x/post_x.html", BROKEN_SELECTORS)
    assert r.used_fallback is True
    assert r.fallback_engine in {"gne", "trafilatura"}
    assert len(r.content_text) > 50
    # GNE/trafilatura 至少应该抽到"无人驾驶航空器"几个字
    assert "无人" in r.content_text or "公告" in r.content_text


def test_trafilatura_fallback_when_gne_fails(monkeypatch):
    # 强制让 gne_extract 返回空，确保 trafilatura 分支被走到
    monkeypatch.setattr(detail_parser, "gne_extract", lambda html, base_url: {})
    r = parse_detail(RICH_HTML, "https://www.gdqy.gov.cn/x/post_x.html", BROKEN_SELECTORS)
    assert r.used_fallback is True
    assert r.fallback_engine == "trafilatura"
    assert len(r.content_text) > 50


def test_both_fallbacks_empty_returns_short_content(monkeypatch):
    monkeypatch.setattr(detail_parser, "gne_extract", lambda html, base_url: {})
    monkeypatch.setattr(detail_parser, "trafilatura_extract", lambda html, base_url: {})
    r = parse_detail(RICH_HTML, "https://www.gdqy.gov.cn/x/post_x.html", BROKEN_SELECTORS)
    # Both fallbacks empty — used_fallback stays False because neither "succeeded"
    assert r.used_fallback is False
    assert r.fallback_engine is None
