"""gkmlpt adapter — unit + real-probe integration.

Three layers:
  1. URL builder is pure + templatable
  2. `parse_list_response` projects real probe JSON without contract violations
  3. bucket-invariant probe catches CMS format drift
"""
from __future__ import annotations

import json
from pathlib import Path

import pytest

from govcrawler.adapters import get_adapter, gkmlpt
from govcrawler.adapters.contract import CrawlItem, Status

PROBE = Path(__file__).resolve().parents[1] / "data" / "probe"


# ---------------------------------------------------------------------------
# registry
# ---------------------------------------------------------------------------
def test_registry_resolves_gkmlpt():
    assert get_adapter("gkmlpt") is gkmlpt


def test_registry_rejects_unknown():
    with pytest.raises(KeyError, match="unknown cms_adapter"):
        get_adapter("not_a_real_cms")


# ---------------------------------------------------------------------------
# URL builder
# ---------------------------------------------------------------------------
class TestBuildListUrl:
    def test_default_template(self):
        url = gkmlpt.build_list_url(
            base_url="http://fgw.qingcheng.gov.cn",
            dept_path="qycsj",
            column_id="1234",
            page=2,
            sid="abc",
        )
        assert url == (
            "http://fgw.qingcheng.gov.cn/qycsj/gkmlpt/api/all/1234"
            "?page=2&sid=abc"
        )

    def test_custom_template(self):
        """Per-site override via adapter_params.list_api_path_tpl."""
        url = gkmlpt.build_list_url(
            base_url="http://example.gov.cn",
            dept_path="foo",
            column_id="77",
            page=1,
            path_tpl="{base_url}/{dept_path}/api/v2/list/{column_id}?fmt=json",
        )
        assert url == (
            "http://example.gov.cn/foo/api/v2/list/77?fmt=json&page=1"
        )

    def test_trailing_slash_normalised(self):
        url = gkmlpt.build_list_url(
            base_url="http://x/",
            dept_path="/y/",
            column_id="9",
        )
        assert url == "http://x/y/gkmlpt/api/all/9?page=1"


# ---------------------------------------------------------------------------
# list response projection
# ---------------------------------------------------------------------------
SAMPLE_MIN = {
    "articles": [
        {
            "id": 2116964,
            "title": "测试通知",
            "url": "http://www.qingcheng.gov.cn/qyqcfgw/gkmlpt/content/2/2116/post_2116964.html",
            "first_publish_time": 1771916570,
            "publisher": "清远市清城区发展和改革局",
            "document_number": "",
        }
    ]
}


class TestParseListResponse:
    def test_projects_minimal_article(self):
        items = gkmlpt.parse_list_response(SAMPLE_MIN, site_id="qingcheng_fgw")
        assert len(items) == 1
        it = items[0]
        assert isinstance(it, CrawlItem)
        assert it.native_post_id == "2116964"
        assert it.status == Status.RAW
        assert it.publish_time is not None
        assert it.publish_time.tzinfo is not None
        assert it.publisher == "清远市清城区发展和改革局"
        assert it.doc_no is None  # empty string normalized to None

    def test_accepts_bare_list(self):
        items = gkmlpt.parse_list_response(
            SAMPLE_MIN["articles"], site_id="qingcheng_fgw"
        )
        assert len(items) == 1

    def test_invalid_payload_type_raises(self):
        from govcrawler.adapters.contract import ContractViolation

        with pytest.raises(ContractViolation):
            gkmlpt.parse_list_response("bad", site_id="x")  # type: ignore[arg-type]

    def test_skips_malformed_row_without_killing_page(self):
        payload = {
            "articles": [
                {"id": 1, "url": ""},  # missing url → skip
                SAMPLE_MIN["articles"][0],  # good
                {"not_a_dict": None},  # noise
            ]
        }
        items = gkmlpt.parse_list_response(payload, site_id="qingcheng_fgw")
        assert len(items) == 1
        assert items[0].native_post_id == "2116964"

    def test_string_date_fallback(self):
        payload = {
            "articles": [
                {
                    "id": 42,
                    "title": "t",
                    "url": "http://x/content/0/0/post_42.html",
                    "date": "2026-04-01T00:00:00Z",
                }
            ]
        }
        items = gkmlpt.parse_list_response(payload, site_id="x")
        assert items[0].publish_time is not None
        assert items[0].publish_time.year == 2026


# ---------------------------------------------------------------------------
# bucket invariant
# ---------------------------------------------------------------------------
class TestBucketInvariant:
    def test_matches(self):
        assert gkmlpt.verify_bucket_invariant(
            {
                "id": 2116964,
                "url": "http://x/qycsj/gkmlpt/content/2/2116/post_2116964.html",
            }
        )

    def test_mismatched_bucket_fails(self):
        assert not gkmlpt.verify_bucket_invariant(
            {"id": 2116964, "url": "http://x/content/99/0/post_2116964.html"}
        )

    def test_non_post_url_fails(self):
        assert not gkmlpt.verify_bucket_invariant(
            {"id": 2116964, "url": "http://x/other.html"}
        )


# ---------------------------------------------------------------------------
# real-probe integration — use verify_httpx.json sample articles
# ---------------------------------------------------------------------------
def _load_probe_samples() -> list[dict]:
    p = PROBE / "verify_httpx.json"
    if not p.exists():
        pytest.skip(f"probe file missing: {p}")
    data = json.loads(p.read_text(encoding="utf-8"))
    return [r for r in data if r.get("sample_article")]


@pytest.mark.parametrize(
    "report",
    _load_probe_samples() if (PROBE / "verify_httpx.json").exists() else [],
    ids=lambda r: r["label"],
)
def test_real_probe_sample_projects_and_passes_bucket_check(report):
    items = gkmlpt.parse_list_response(
        {"articles": [report["sample_article"]]},
        site_id=report["label"],
    )
    assert len(items) == 1
    assert gkmlpt.verify_bucket_invariant(report["sample_article"])
