from __future__ import annotations

import re
import secrets
import threading
import time
from dataclasses import dataclass
from datetime import datetime
from typing import Mapping

import httpx

from govcrawler.settings import get_settings
from govcrawler.storage.attachments import DownloadedAttachment, save_attachment_bytes

BASE_URL = "http://c.gb688.cn/bzgk/gb"
SESSION_TTL_SEC = 10 * 60


class OpenStdDownloadError(RuntimeError):
    pass


class OpenStdCaptchaInvalid(OpenStdDownloadError):
    def __init__(self, session_id: str):
        super().__init__("captcha invalid")
        self.session_id = session_id


@dataclass
class OpenStdDownloadSession:
    session_id: str
    hcno: str
    client: httpx.Client
    captcha_bytes: bytes
    captcha_content_type: str
    expires_at: float
    article_id: int | None = None


_sessions: dict[str, OpenStdDownloadSession] = {}
_lock = threading.RLock()


def _headers() -> dict[str, str]:
    return {
        "User-Agent": get_settings().user_agent,
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        "Referer": "https://openstd.samr.gov.cn/",
    }


def _new_client() -> httpx.Client:
    return httpx.Client(base_url=BASE_URL, headers=_headers(), timeout=30, follow_redirects=True)


def _cleanup(now: float | None = None) -> None:
    now = now or time.time()
    expired: list[str] = []
    with _lock:
        for sid, sess in _sessions.items():
            if sess.expires_at <= now:
                expired.append(sid)
        for sid in expired:
            sess = _sessions.pop(sid, None)
            if sess is not None:
                sess.client.close()


def _valid_hcno(hcno: str) -> str:
    value = (hcno or "").strip().upper()
    if not re.fullmatch(r"[A-F0-9]{16,64}", value):
        raise ValueError("invalid openstd hcno")
    return value


def _refresh_captcha(sess: OpenStdDownloadSession) -> None:
    r = sess.client.get(f"gc?_={int(time.time() * 1000)}")
    r.raise_for_status()
    if not (r.headers.get("content-type") or "").lower().startswith("image/"):
        raise OpenStdDownloadError("captcha endpoint did not return an image")
    sess.captcha_bytes = r.content
    sess.captcha_content_type = r.headers.get("content-type") or "image/jpeg"
    sess.expires_at = time.time() + SESSION_TTL_SEC


def _warmup_download_page(client: httpx.Client, hcno: str) -> None:
    """Best-effort session warmup.

    The legacy OpenSTD download host still serves captcha and verify endpoints,
    but its showGb page can return 404 or reset the connection for some records.
    Starting a captcha session should not fail before a human can solve it.
    """
    try:
        r = client.get(
            "showGb",
            params={"type": "download", "hcno": hcno, "request_locale": "zh"},
        )
        if r.status_code >= 500:
            r.raise_for_status()
    except httpx.HTTPError:
        return


def start_session(hcno: str, *, article_id: int | None = None) -> OpenStdDownloadSession:
    _cleanup()
    hcno = _valid_hcno(hcno)
    client = _new_client()
    try:
        _warmup_download_page(client, hcno)
        sid = secrets.token_urlsafe(24)
        sess = OpenStdDownloadSession(
            session_id=sid,
            hcno=hcno,
            article_id=article_id,
            client=client,
            captcha_bytes=b"",
            captcha_content_type="image/jpeg",
            expires_at=time.time() + SESSION_TTL_SEC,
        )
        _refresh_captcha(sess)
    except Exception:
        client.close()
        raise
    with _lock:
        _sessions[sid] = sess
    return sess


def get_session(session_id: str) -> OpenStdDownloadSession:
    _cleanup()
    with _lock:
        sess = _sessions.get(session_id)
    if sess is None:
        raise KeyError(session_id)
    return sess


def refresh_captcha(session_id: str) -> OpenStdDownloadSession:
    sess = get_session(session_id)
    _refresh_captcha(sess)
    return sess


def close_session(session_id: str) -> None:
    with _lock:
        sess = _sessions.pop(session_id, None)
    if sess is not None:
        sess.client.close()


def submit_captcha_and_download(
    session_id: str,
    captcha: str,
    *,
    site: str,
    column: str,
    when: datetime,
    article_key: str,
    fallback_name: str,
) -> DownloadedAttachment:
    sess = get_session(session_id)
    code = (captcha or "").strip()
    if not code:
        raise OpenStdCaptchaInvalid(session_id)
    r = sess.client.post("verifyCode", data={"verifyCode": code})
    r.raise_for_status()
    if (r.text or "").strip().lower() != "success":
        _refresh_captcha(sess)
        raise OpenStdCaptchaInvalid(session_id)

    file_resp = sess.client.get("viewGb", params={"hcno": sess.hcno})
    file_resp.raise_for_status()
    if not file_resp.content:
        raise OpenStdDownloadError("download returned empty content")
    headers: Mapping[str, str] = file_resp.headers
    downloaded = save_attachment_bytes(
        file_resp.content,
        headers=dict(headers),
        fallback_name=fallback_name,
        site=site,
        column=column,
        when=when,
        article_key=article_key,
        source_url=str(file_resp.url),
    )
    close_session(session_id)
    return downloaded
