"""v2: let patchright sit long enough to clear ctct challenge on city site.

Strategy: goto index, DO NOT trust networkidle (挑战页就是 idle)；
轮询 page.content() 直到 ctct 字样消失 or 真正的 SPA 标识出现，再看 XHR。
"""
from __future__ import annotations

import asyncio
import json
import re
from pathlib import Path

import httpx
from patchright.async_api import async_playwright

OUT = Path(__file__).resolve().parents[1] / "data" / "probe"
UA = ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
      "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36")

TARGET_INDEX = "https://www.gdqy.gov.cn/qyjyj/gkmlpt/index#128"
TARGET_COL = 128
DEPT = "qyjyj"
BASE = "https://www.gdqy.gov.cn"


async def main():
    rep: dict = {"target": TARGET_INDEX}
    async with async_playwright() as p:
        br = await p.chromium.launch(headless=True,
                                     args=["--disable-blink-features=AutomationControlled"])
        ctx = await br.new_context(user_agent=UA, locale="zh-CN")
        page = await ctx.new_page()
        apis: list[str] = []
        page.on("response", lambda r: (
            apis.append(f"{r.status} {r.url}") if "/gkmlpt/" in r.url else None
        ))
        # don't wait for networkidle — challenge page IS idle
        await page.goto(TARGET_INDEX, wait_until="domcontentloaded", timeout=45000)
        # poll up to 20s for challenge clearing (ctct gone from DOM)
        for i in range(20):
            await page.wait_for_timeout(1000)
            html = await page.content()
            if 'name="ctct"' not in html and "请稍候" not in html:
                rep["cleared_after_sec"] = i + 1
                break
        else:
            rep["cleared_after_sec"] = None  # never cleared

        # give SPA a beat to fire its XHRs
        await page.wait_for_timeout(3000)
        rep["apis_seen"] = apis
        rep["final_title"] = await page.title()
        cookies = await ctx.cookies()
        rep["cookies"] = [{"n": c["name"], "v_len": len(c.get("value","")),
                           "domain": c.get("domain")} for c in cookies]
        await br.close()

    # try replay any sniffed api/all via httpx with cookies
    sid_url = next((u for u in (a.split(" ",1)[-1] for a in rep["apis_seen"])
                    if "/gkmlpt/api/all/" in u), None)
    rep["api_sid_url"] = sid_url
    if sid_url and cookies:
        jar = httpx.Cookies()
        for c in cookies:
            try:
                jar.set(c["name"], c["value"], domain=c.get("domain",""), path=c.get("path","/"))
            except Exception:
                pass
        with httpx.Client(verify=False, follow_redirects=True, timeout=20.0, cookies=jar) as cli:
            r = cli.get(sid_url, headers={"User-Agent": UA, "Referer": TARGET_INDEX,
                                           "Accept":"application/json"})
        rep["replay_status"] = r.status_code
        rep["replay_ct"] = r.headers.get("content-type","")
        rep["replay_head"] = r.text[:300]
        try:
            rep["replay_articles"] = len(r.json().get("articles") or [])
        except Exception as e:
            rep["replay_json_err"] = str(e)

    OUT.mkdir(parents=True, exist_ok=True)
    (OUT / "verify_city_v2.json").write_text(json.dumps(rep, ensure_ascii=False, indent=2),
                                              encoding="utf-8")
    print(json.dumps(rep, ensure_ascii=False, indent=2))


if __name__ == "__main__":
    asyncio.run(main())
