"""Tier 2 fetcher: httpx + real UA. Cheap and fast for sites without JS challenge."""
from __future__ import annotations
import time
from urllib.parse import urlparse

import httpx

from govcrawler.cookies import get_default_store
from govcrawler.fetcher.browser import FetchResult, is_challenge_page
from govcrawler.settings import get_settings

DEFAULT_TIMEOUT = 20.0


def fetch_html_http(url: str, *, timeout_s: float = DEFAULT_TIMEOUT) -> FetchResult:
    """Synchronous httpx GET with real UA and http2=False (PG 16 compat).

    If the cookie pool has an entry for this host (placed there by a previous
    successful playwright pass), inject those cookies so the WAF lets us
    through on the cheap path.
    """
    settings = get_settings()
    headers = {
        "User-Agent": settings.user_agent,
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
    }
    host = urlparse(url).netloc.lower()
    pool_cookies = get_default_store().get(host) or {}
    t0 = time.time()
    try:
        with httpx.Client(
            headers=headers,
            cookies=pool_cookies,
            timeout=timeout_s,
            follow_redirects=True,
            http2=False,
        ) as c:
            r = c.get(url)
            html = r.text if "text" in r.headers.get("content-type", "") or r.status_code < 400 else ""
            challenge = is_challenge_page(r.status_code, html)
            return FetchResult(
                url=url,
                final_url=str(r.url),
                status=r.status_code,
                html=html,
                fetched_at=time.time(),
                duration_ms=int((time.time() - t0) * 1000),
                is_challenge=challenge,
                strategy="httpx",
            )
    except Exception as e:
        return FetchResult(
            url=url,
            final_url=url,
            status=0,
            html="",
            fetched_at=time.time(),
            duration_ms=int((time.time() - t0) * 1000),
            is_challenge=False,
            error=f"{type(e).__name__}: {e}",
            strategy="httpx",
        )
