"""Auto-fallback fetcher chain: httpx (Tier 2) → playwright (Tier 3).

Per design doc §4.1: default Tier 2; on 412/403/empty/challenge → Tier 3.
Tier 4 (DrissionPage) not yet wired — add when Tier 3 also fails in production.
"""
from __future__ import annotations
from urllib.parse import urlparse

from govcrawler.cookies import get_default_store
from govcrawler.fetcher.browser import FetchResult, fetch_html as fetch_html_browser
from govcrawler.fetcher.http_client import fetch_html_http

MIN_HTML_CHARS = 500  # if Tier 2 returns less, assume JS-rendered and fall back


def _should_fallback(fr: FetchResult) -> bool:
    if fr.error:
        return True
    if fr.status in (403, 412, 429) or fr.status >= 500:
        return True
    if fr.is_challenge:
        return True
    if not fr.html or len(fr.html) < MIN_HTML_CHARS:
        return True
    return False


def fetch_html(url: str, *, force_browser: bool = False) -> FetchResult:
    """Try Tier 2 (httpx) first; fall back to Tier 3 (playwright) on failure.

    Set `force_browser=True` to skip Tier 2 entirely (e.g. known-challenge domains).
    """
    if force_browser:
        return fetch_html_browser(url)

    fr = fetch_html_http(url)
    if not _should_fallback(fr):
        return fr

    # FETCH-04: Tier 2 failed with injected cookies → they're stale/invalid.
    # Drop the pool entry so playwright re-primes from a clean slate.
    try:
        host = urlparse(url).netloc.lower()
        get_default_store().invalidate(host)
    except Exception:
        pass

    # Tier 3 fallback; preserve original Tier 2 failure info in error if browser also fails
    fr2 = fetch_html_browser(url)
    if fr2.error and fr.error:
        fr2.error = f"httpx:{fr.error} | playwright:{fr2.error}"
    return fr2
