"""Validator (selector dry-run) tests."""
from pathlib import Path

import pytest

from govcrawler.config import registry
from govcrawler.fetcher.browser import FetchResult
from govcrawler.validator import render_human, validate


SITE_YAML = """\
site_id: demo
site_name: demo site
base_url: https://demo.example.com
default_strategy: httpx
enabled: true
respect_robots: true
columns:
  - column_id: c1
    name: c1 list
    list_url: https://demo.example.com/list/
    list_selector:
      row: "ul.news_list li"
      href: "a::attr(href)"
      title: "a::text"
      date: "span.date::text"
    detail:
      title: "h1.article-title::text"
      publish_time: "span.time::text"
      source: ""
      content: "div.article-content"
      attachment_css: "a[href$='.pdf']"
    schedule: "0 2 * * *"
    enabled: true
"""

LIST_HTML = """
<html><body><ul class="news_list">
  <li><a href="/a/post_1.html">标题一</a><span class="date">2026-04-20</span></li>
  <li><a href="/a/post_2.html">标题二</a><span class="date">2026-04-19</span></li>
  <li><a href="/a/post_3.html">标题三</a><span class="date">2026-04-18</span></li>
</ul></body></html>
"""

GOOD_DETAIL = (
    '<html><body><h1 class="article-title">测试公告</h1>'
    '<span class="time">2026-04-20 10:00:00</span>'
    '<div class="article-content"><p>正文内容一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十一二三四五六七八九十。</p>'
    '<a href="https://demo.example.com/att/a.pdf">附件.pdf</a></div></body></html>'
)

BAD_DETAIL = "<html><body>no structure</body></html>"


@pytest.fixture(autouse=True)
def _site_root(tmp_path, monkeypatch):
    monkeypatch.setenv("DB_URL", "postgresql+psycopg://x/x")
    monkeypatch.setenv("USER_AGENT", "TestBot/1.0")
    root = tmp_path / "sites"
    root.mkdir()
    (root / "demo.yaml").write_text(SITE_YAML, encoding="utf-8")
    monkeypatch.setattr(registry, "DEFAULT_CONFIG_DIR", root)
    registry.reload()
    yield root


def _fr(url, html, status=200):
    return FetchResult(
        url=url, final_url=url, status=status, html=html,
        fetched_at=0.0, duration_ms=12, is_challenge=False, strategy="httpx",
    )


def test_validate_unknown_site_returns_error():
    r = validate("nope", "x")
    assert r["ok"] is False
    assert "no YAML" in r["error"]


def test_validate_list_mode_parses_rows_and_samples():
    def fake(url, **kw):
        if url.endswith("/list/"):
            return _fr(url, LIST_HTML)
        return _fr(url, GOOD_DETAIL)

    r = validate("demo", "c1", max_detail=2, fetch=fake)
    assert r["ok"] is True
    assert r["list_items_parsed"] == 3
    assert len(r["list_items_preview"]) == 2
    assert len(r["detail_samples"]) == 2
    first = r["detail_samples"][0]
    assert first["title"] == "测试公告"
    assert first["content_text_length"] > 50
    assert any(u.endswith("a.pdf") for u in first["attachment_urls"])
    assert all(h["level"] != "warn" for h in first["hints"])


def test_validate_detail_mode_direct_url():
    def fake(url, **kw):
        return _fr(url, GOOD_DETAIL)

    r = validate("demo", "c1", url="https://demo.example.com/a/post_x.html", fetch=fake)
    assert r["ok"] is True
    assert "list_fetch" not in r
    assert r["detail"]["title"] == "测试公告"


def test_validate_detail_hints_on_empty_fields():
    def fake(url, **kw):
        return _fr(url, BAD_DETAIL)

    r = validate("demo", "c1", url="https://demo.example.com/a/post_x.html", fetch=fake)
    assert r["ok"] is True
    levels = [h["level"] for h in r["detail"]["hints"]]
    msgs = [h["message"] for h in r["detail"]["hints"]]
    # title, publish_time, content-length all absent → at least 3 warn hints
    assert levels.count("warn") >= 2
    assert any("title" in m for m in msgs)


def test_validate_list_fetch_failure_short_circuits():
    def fake(url, **kw):
        return FetchResult(
            url=url, final_url=url, status=0, html="",
            fetched_at=0.0, duration_ms=1, is_challenge=False,
            error="ConnectError: boom", strategy="httpx",
        )

    r = validate("demo", "c1", fetch=fake)
    assert r["ok"] is False
    assert "ConnectError" in r["error"]


def test_validate_list_zero_rows_emits_warn_hint():
    def fake(url, **kw):
        # Return HTML where our selector won't match
        return _fr(url, "<html><body>nothing here</body></html>")

    r = validate("demo", "c1", fetch=fake)
    assert r["list_items_parsed"] == 0
    assert any(h["level"] == "warn" for h in r["list_hints"])


def test_render_human_output_contains_key_labels():
    def fake(url, **kw):
        if url.endswith("/list/"):
            return _fr(url, LIST_HTML)
        return _fr(url, GOOD_DETAIL)

    r = validate("demo", "c1", max_detail=1, fetch=fake)
    text = render_human(r)
    assert text.startswith("[OK]")
    assert "list page" in text
    assert "title" in text
    assert "测试公告" in text
    assert "attachments" in text


def test_render_human_shows_error_on_failure():
    text = render_human({"ok": False, "error": "bad", "site_id": "demo", "column_id": "c1"})
    assert text.startswith("[FAIL]")
    assert "error: bad" in text
