import pytest

from govcrawler.fetcher import chain
from govcrawler.fetcher.browser import FetchResult


@pytest.fixture(autouse=True)
def _env(monkeypatch):
    monkeypatch.setenv("DB_URL", "postgresql+psycopg://x/x")
    monkeypatch.setenv("USER_AGENT", "TestBot/1.0")


def _ok_result(strategy="httpx", status=200, html=None):
    html = html if html is not None else "<html>" + "x" * 1000 + "</html>"
    return FetchResult(
        url="https://a.com/x",
        final_url="https://a.com/x",
        status=status,
        html=html,
        fetched_at=0.0,
        duration_ms=10,
        is_challenge=False,
        strategy=strategy,
    )


def test_tier2_success_no_fallback(monkeypatch):
    calls = {"http": 0, "browser": 0}

    def fake_http(url, **kw):
        calls["http"] += 1
        return _ok_result()

    def fake_browser(url, **kw):
        calls["browser"] += 1
        return _ok_result(strategy="playwright")

    monkeypatch.setattr(chain, "fetch_html_http", fake_http)
    monkeypatch.setattr(chain, "fetch_html_browser", fake_browser)

    fr = chain.fetch_html("https://a.com/x")
    assert fr.strategy == "httpx"
    assert calls["browser"] == 0


def test_tier2_412_triggers_fallback(monkeypatch):
    def fake_http(url, **kw):
        return FetchResult(
            url=url, final_url=url, status=412, html="<html>请稍候</html>",
            fetched_at=0.0, duration_ms=5, is_challenge=True, strategy="httpx",
        )

    def fake_browser(url, **kw):
        return _ok_result(strategy="playwright", html="<html>" + "y" * 2000 + "</html>")

    monkeypatch.setattr(chain, "fetch_html_http", fake_http)
    monkeypatch.setattr(chain, "fetch_html_browser", fake_browser)

    fr = chain.fetch_html("https://a.com/x")
    assert fr.strategy == "playwright"
    assert fr.status == 200


def test_empty_html_triggers_fallback(monkeypatch):
    def fake_http(url, **kw):
        return FetchResult(
            url=url, final_url=url, status=200, html="<html></html>",
            fetched_at=0.0, duration_ms=5, is_challenge=False, strategy="httpx",
        )

    monkeypatch.setattr(chain, "fetch_html_http", fake_http)
    monkeypatch.setattr(
        chain,
        "fetch_html_browser",
        lambda url, **kw: _ok_result(strategy="playwright"),
    )

    fr = chain.fetch_html("https://a.com/x")
    assert fr.strategy == "playwright"


def test_force_browser_skips_tier2(monkeypatch):
    http_called = {"n": 0}

    def fake_http(url, **kw):
        http_called["n"] += 1
        return _ok_result()

    monkeypatch.setattr(chain, "fetch_html_http", fake_http)
    monkeypatch.setattr(
        chain,
        "fetch_html_browser",
        lambda url, **kw: _ok_result(strategy="playwright"),
    )

    fr = chain.fetch_html("https://a.com/x", force_browser=True)
    assert fr.strategy == "playwright"
    assert http_called["n"] == 0


def test_both_tiers_fail_preserves_error(monkeypatch):
    """Application-layer Tier-2 failure (e.g. 412 challenge) WILL escalate
    to Tier 3, and when Tier 3 also fails the error string must keep both
    tiers' messages so logs explain the full failure chain.

    Note: we deliberately use a non-network error here (`ChallengeDetected`).
    The chain v2 contract says network-layer errors (ConnectError, ReadError,
    timeouts) do NOT escalate — Tier 3 can't fix a TCP RST and 30s of
    playwright nav-timeout per article wastes worker time. See
    `_NETWORK_ERROR_PREFIXES` in fetcher/chain.py."""
    # First fetch isolates a fresh host so the cooldown counter doesn't carry
    # over from prior tests.
    import os
    os.environ.pop("GOVCRAWLER_HOST_ALLOWLIST", None)

    monkeypatch.setattr(
        chain,
        "fetch_html_http",
        lambda url, **kw: FetchResult(
            url=url, final_url=url, status=412, html="", fetched_at=0.0,
            duration_ms=1, is_challenge=True, error="HTTPStatusError: 412",
            strategy="httpx",
        ),
    )
    monkeypatch.setattr(
        chain,
        "fetch_html_browser",
        lambda url, **kw: FetchResult(
            url=url, final_url=url, status=0, html="", fetched_at=0.0,
            duration_ms=1, is_challenge=False, error="TimeoutError: y",
            strategy="playwright",
        ),
    )

    fr = chain.fetch_html("https://a.com/x")
    assert fr.strategy == "playwright"
    assert "httpx:" in (fr.error or "") and "playwright:" in (fr.error or "")


def test_network_error_does_not_escalate_to_browser(monkeypatch):
    """Per chain v2 contract: TCP-level / DNS / timeout errors stay at
    Tier 2 and are returned as-is. Switching to playwright reproduces
    the same RST against the same host."""
    monkeypatch.setattr(
        chain,
        "fetch_html_http",
        lambda url, **kw: FetchResult(
            url=url, final_url=url, status=0, html="", fetched_at=0.0,
            duration_ms=1, is_challenge=False, error="ConnectError: refused",
            strategy="httpx",
        ),
    )
    called = {"browser": 0}

    def _bf(url, **kw):
        called["browser"] += 1
        return FetchResult(
            url=url, final_url=url, status=200, html="<html>" + "y" * 2000 + "</html>",
            fetched_at=0.0, duration_ms=1, is_challenge=False, strategy="playwright",
        )

    monkeypatch.setattr(chain, "fetch_html_browser", _bf)
    fr = chain.fetch_html("https://b.com/x")
    assert fr.strategy == "httpx"
    assert called["browser"] == 0
    assert (fr.error or "").startswith("ConnectError")
