"""Admin dashboard + API tests — v2 schema aligned.

Changes from 1.0:
  * Toggles now flip `enabled` on DB rows (crawl_site / crawl_target), not YAML
  * `/admin/api/sites/{site_id}/{column_id}/run` → `/admin/api/targets/{target_code}/run`
  * Retry now calls v2 `pipeline.fetch_and_store(target_code=..., url=...)`
  * Article/log fixtures built with int FKs via _v2fixtures helpers
"""
from datetime import datetime, timedelta
from pathlib import PurePosixPath
from types import SimpleNamespace

import pytest
from fastapi.testclient import TestClient
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker

from govcrawler import api as api_pkg
from govcrawler.api.admin import _session as real_session
from govcrawler.models import (
    Article,
    ArticleStandardMeta,
    Attachment,
    Base,
    CrawlJob,
    CrawlLog,
    CrawlSite,
    CrawlTarget,
    LocalDepartment,
    MasterColumnRegistry,
    SiteDepartment,
)
from govcrawler.storage.attachments import DownloadedAttachment

from tests._v2fixtures import make_site_and_target


@pytest.fixture
def db(tmp_path, monkeypatch):
    monkeypatch.setenv("DB_URL", "sqlite:///" + str(tmp_path / "admin.db"))
    monkeypatch.setenv("DATA_DIR", str(tmp_path / "data"))
    monkeypatch.setenv("USER_AGENT", "TestBot/1.0")

    engine = create_engine("sqlite:///" + str(tmp_path / "admin.db"), future=True)
    Base.metadata.create_all(engine)
    SM = sessionmaker(bind=engine, expire_on_commit=False)

    def _override():
        with SM() as s:
            yield s

    api_pkg.app.dependency_overrides[real_session] = _override

    with SM() as s:
        site, target = make_site_and_target(s, site_code="demo", column_id="c1")
        site_pk, target_id = site.id, target.id
        target_code = target.target_code

        now = datetime.utcnow()
        s.add(Article(
            id=1, site_id=site_pk, target_id=target_id,
            url="https://demo/1", url_hash="a" * 64,
            title="广东省人民政府文件", status="ready",
            content_text="正文关键词出现在这里", has_attachment=False,
            fetch_strategy="httpx", fetched_at=now - timedelta(hours=1),
            publish_time=now - timedelta(hours=2),
            source_raw="本网",
            channel_name="政策文件",
            channel_path="/zwgk/wjk",
            content_category="法规文件",
            content_subcategory="通知",
            index_no="006939748/2026-00001",
            doc_no="粤府函〔2026〕1号",
            publisher="广东省人民政府",
            publish_date=(now - timedelta(days=1)).date(),
            effective_date=now.date(),
            is_effective=True,
            topic_words="政务公开",
            open_category="法规文件/通知",
        ))
        s.add(Article(
            id=2, site_id=site_pk, target_id=target_id,
            url="https://demo/2", url_hash="b" * 64,
            title="政策解读", status="failed",
            content_text="短", has_attachment=False, fetch_strategy="playwright",
            fetched_at=now - timedelta(hours=3),
        ))
        s.add(CrawlLog(
            id=1, site_id=site_pk, target_id=target_id,
            article_url="https://demo/1",
            strategy="httpx", http_status=200, duration_ms=120, success=True,
            occurred_at=now - timedelta(minutes=30),
        ))
        s.add(CrawlLog(
            id=2, site_id=site_pk, target_id=target_id,
            article_url="https://demo/x",
            strategy="playwright", http_status=412, duration_ms=500, success=False,
            error_msg="ctct_challenge_unresolved",
            occurred_at=now - timedelta(minutes=10),
        ))
        s.add(CrawlJob(
            job_id="full_done_1",
            host="demo",
            site_code="demo",
            target_code=target_code,
            source="manual",
            status="done",
            force=True,
            enqueued_at=now - timedelta(minutes=20),
            started_at=now - timedelta(minutes=19),
            finished_at=now - timedelta(minutes=15),
            result_json={
                "status": "ok",
                "items_seen": 12,
                "items_new": 3,
                "items_skipped": 9,
                "items_failed": 0,
            },
        ))
        s.add(CrawlJob(
            job_id="inc_failed_1",
            host="demo",
            site_code="demo",
            target_code=target_code,
            source="manual",
            status="failed",
            force=False,
            enqueued_at=now - timedelta(minutes=5),
            started_at=now - timedelta(minutes=4),
            finished_at=now - timedelta(minutes=3),
            error_msg="ConnectError: boom",
        ))
        s.add(CrawlJob(
            job_id="full_aborted_1",
            host="demo",
            site_code="demo",
            target_code=target_code,
            source="manual",
            status="done",
            force=True,
            enqueued_at=now - timedelta(minutes=2),
            started_at=now - timedelta(minutes=2),
            finished_at=now - timedelta(minutes=1),
            result_json={"status": "aborted", "items_seen": 4},
        ))
        s.commit()

    yield {"SM": SM, "target_code": target_code, "site_code": "demo"}
    api_pkg.app.dependency_overrides.clear()


def _c():
    return TestClient(api_pkg.app)


# ---------- dashboard HTML ----------

def test_dashboard_serves_html(db, tmp_path, monkeypatch):
    # Replace the dashboard template with a stub so test doesn't depend on
    # the real HTML asset shipping in the repo.
    from govcrawler.api import admin as admin_mod
    stub = tmp_path / "admin_stub.html"
    stub.write_text("<html><body>GovCrawler 管理后台</body></html>", encoding="utf-8")
    monkeypatch.setattr(admin_mod, "DASHBOARD_HTML", stub)
    r = _c().get("/admin/")
    assert r.status_code == 200
    assert "text/html" in r.headers["content-type"]
    assert "GovCrawler 管理后台" in r.text


# ---------- sites ----------

def test_api_sites_lists_demo(db):
    body = _c().get("/admin/api/sites").json()
    codes = [s["site_code"] for s in body["sites"]]
    assert "demo" in codes
    demo = next(s for s in body["sites"] if s["site_code"] == "demo")
    assert demo["enabled"] is True
    target = next(t for t in demo["targets"] if t["target_code"] == db["target_code"])
    assert target["has_completed_full_crawl"] is True
    assert target["last_full_crawl_job"]["job_id"] == "full_done_1"
    assert target["last_full_crawl_job"]["items_seen"] == 12


def test_target_crawl_jobs_lists_history(db):
    body = _c().get(f"/admin/api/targets/{db['target_code']}/crawl-jobs").json()
    assert body["count"] == 3
    assert [j["job_id"] for j in body["jobs"]] == [
        "full_aborted_1",
        "inc_failed_1",
        "full_done_1",
    ]

    full_only = _c().get(
        f"/admin/api/targets/{db['target_code']}/crawl-jobs?force=true"
    ).json()
    assert full_only["count"] == 2
    assert full_only["jobs"][0]["force"] is True
    assert full_only["jobs"][0]["result_status"] == "aborted"
    assert full_only["jobs"][1]["items_seen"] == 12


def test_article_business_filters_and_payload(db):
    r = _c().get(
        "/admin/api/articles/search",
        params={
            "doc_no": "粤府函",
            "publisher": "广东省人民政府",
            "content_category": "法规",
            "is_effective": "true",
            "has_attachment": "false",
            "fetch_strategy": "httpx",
        },
    )
    assert r.status_code == 200
    body = r.json()
    assert body["total"] == 1
    item = body["items"][0]
    assert item["id"] == 1
    assert item["site_id"] is not None
    assert item["target_id"] is not None
    assert item["url_hash"] == "a" * 64
    assert item["doc_no"] == "粤府函〔2026〕1号"
    assert item["publisher"] == "广东省人民政府"
    assert item["content_text_len"] > 0
    assert item["created_at"] is not None

    ids = _c().get(
        "/admin/api/articles/search/ids",
        params={"index_no": "006939748", "open_category": "通知"},
    ).json()
    assert ids["ids"] == [1]


def test_article_detail_exposes_all_business_attributes(db):
    body = _c().get("/admin/api/articles/1").json()
    assert body["native_post_id"] is None
    assert body["url_hash"] == "a" * 64
    assert body["source_raw"] == "本网"
    assert body["content_category"] == "法规文件"
    assert body["content_text_len"] == len("正文关键词出现在这里")
    assert body["created_at"] is not None


def test_toggle_target_flips_db_row(db):
    r = _c().post(f"/admin/api/targets/{db['target_code']}/toggle?enabled=false")
    assert r.status_code == 200
    assert r.json()["enabled"] is False

    # verify DB state flipped
    with db["SM"]() as s:
        t = s.query(CrawlTarget).filter_by(target_code=db["target_code"]).one()
        assert t.enabled is False


def test_toggle_site_flips_db_row(db):
    r = _c().post("/admin/api/sites/demo/toggle?enabled=false")
    assert r.status_code == 200
    with db["SM"]() as s:
        site = s.query(CrawlSite).filter_by(site_code="demo").one()
        assert site.enabled is False


def test_toggle_site_unknown_returns_404(db):
    r = _c().post("/admin/api/sites/does_not_exist/toggle?enabled=false")
    assert r.status_code == 404


def test_toggle_target_unknown_returns_404(db):
    r = _c().post("/admin/api/targets/nope/toggle?enabled=false")
    assert r.status_code == 404


def test_create_and_update_site(db):
    payload = {
        "site_code": "demo2",
        "site_name": "演示站二",
        "base_url": "https://demo2.example.com",
        "site_role": "county_local",
        "yaml_path": "config/sites/demo2.yaml",
        "enabled": True,
        "respect_robots": True,
    }
    r = _c().post("/admin/api/sites", json=payload)
    assert r.status_code == 200
    assert r.json()["site"]["site_code"] == "demo2"

    r2 = _c().put("/admin/api/sites/demo2", json={"site_name": "演示站二-改", "enabled": False})
    assert r2.status_code == 200
    assert r2.json()["site"]["site_name"] == "演示站二-改"
    assert r2.json()["site"]["enabled"] is False


def test_create_and_update_target(db):
    with db["SM"]() as s:
        s.add(LocalDepartment(dept_id=888, dept_name="发改局", full_name="示例发改局"))
        s.commit()

    payload = {
        "site_code": "demo",
        "target_code": "demo__policy",
        "target_name": "政策文件",
        "entry_url": "https://demo.example.com/policy/",
        "dept_id": 888,
        "channel_name": "政策文件",
        "channel_path": "信息公开｜政策文件",
        "content_category": "政策文件",
        "expected_cadence_days": 7,
        "interval_sec": 3,
        "interval_jitter_sec": 9,
        "track_checkpoint": True,
        "enabled": True,
    }
    r = _c().post("/admin/api/targets", json=payload)
    assert r.status_code == 200
    assert r.json()["target"]["target_code"] == "demo__policy"
    assert r.json()["target"]["dept_id"] == 888
    assert r.json()["target"]["interval_jitter_sec"] == 9
    assert r.json()["target"]["track_checkpoint"] is True

    r2 = _c().put(
        "/admin/api/targets/demo__policy",
        json={
            "target_name": "政策文件-更新",
            "enabled": False,
            "track_checkpoint": False,
            "content_subcategory": "部门文件",
        },
    )
    assert r2.status_code == 200
    assert r2.json()["target"]["target_name"] == "政策文件-更新"
    assert r2.json()["target"]["enabled"] is False
    assert r2.json()["target"]["track_checkpoint"] is False
    assert r2.json()["target"]["content_subcategory"] == "部门文件"


def test_run_target_queues_background_task(db, monkeypatch):
    called = {"n": 0, "args": None}

    def fake_crawl(target_code, *, max_items=None, stop_on_duplicate=True, **kw):
        called["n"] += 1
        called["args"] = (target_code, max_items)
        called["stop_on_duplicate"] = stop_on_duplicate
        return {"status": "ok", "items_new": 0}

    monkeypatch.setattr("govcrawler.pipeline.crawl_target", fake_crawl)
    r = _c().post(f"/admin/api/targets/{db['target_code']}/run")
    assert r.status_code == 200
    assert r.json()["queued"] is True
    assert called["n"] == 1
    assert called["args"] == (db["target_code"], None)
    # default = no force → pipeline gets stop_on_duplicate=True (incremental)
    assert called["stop_on_duplicate"] is True

    # ?force=true is accepted and reflected in response. (Validating that
    # it actually propagates to crawl_target through the async queue worker
    # is flaky in tests because each _c() creates its own event loop, so
    # we just verify the endpoint contract here.)
    r = _c().post(f"/admin/api/targets/{db['target_code']}/run?force=true")
    assert r.status_code == 200
    assert r.json()["force"] is True


def test_guangdong_gkmlpt_sites_share_rate_queue(db):
    from govcrawler.api import task_queue

    task_queue._HOST_CACHE.clear()
    with db["SM"]() as s:
        s.add_all([
            CrawlSite(
                site_code="gd_finance",
                site_name="广东省财政厅",
                base_url="https://czt.gd.gov.cn",
                cms_adapter="gkmlpt",
                managed_by="ui",
                enabled=True,
            ),
            CrawlSite(
                site_code="gd_transport",
                site_name="广东省交通运输厅",
                base_url="https://td.gd.gov.cn",
                cms_adapter="gkmlpt",
                managed_by="ui",
                enabled=True,
            ),
            CrawlSite(
                site_code="qingcheng_fgw",
                site_name="清城区发改局",
                base_url="http://fgw.qingcheng.gov.cn",
                cms_adapter="gkmlpt",
                managed_by="ui",
                enabled=True,
            ),
        ])
        s.commit()

    assert task_queue._rate_key_for_site("gd_finance") == "gd_gkmlpt_shared"
    assert task_queue._rate_key_for_site("gd_transport") == "gd_gkmlpt_shared"
    assert task_queue._rate_key_for_site("qingcheng_fgw") == "fgw.qingcheng.gov.cn"


def test_run_target_can_resume_from_latest_checkpoint(db, monkeypatch):
    with db["SM"]() as s:
        t = s.query(CrawlTarget).filter_by(target_code=db["target_code"]).one()
        t.track_checkpoint = True
        now = datetime.utcnow()
        s.add_all([
            CrawlJob(
                job_id="old_checkpoint",
                host="demo.example.com",
                site_code=db["site_code"],
                target_code=db["target_code"],
                source="manual",
                status="cancelled",
                force=True,
                stop_requested=False,
                last_completed_page=7,
                enqueued_at=now - timedelta(days=2),
                finished_at=now - timedelta(days=2),
            ),
            CrawlJob(
                job_id="new_checkpoint",
                host="demo.example.com",
                site_code=db["site_code"],
                target_code=db["target_code"],
                source="manual",
                status="failed",
                force=True,
                stop_requested=False,
                last_completed_page=12,
                enqueued_at=now - timedelta(hours=1),
                finished_at=now - timedelta(hours=1),
            ),
            CrawlJob(
                job_id="newer_incremental_checkpoint",
                host="demo.example.com",
                site_code=db["site_code"],
                target_code=db["target_code"],
                source="schedule",
                status="done",
                force=False,
                stop_requested=False,
                last_completed_page=1,
                enqueued_at=now,
                finished_at=now,
            ),
        ])
        s.commit()

    class FakeQueue:
        def __init__(self):
            self.kw = None

        async def submit(self, **kw):
            self.kw = kw
            return "resume_job"

    fake = FakeQueue()
    monkeypatch.setattr("govcrawler.api.task_queue.get_queue", lambda: fake)

    r = _c().post(
        f"/admin/api/targets/{db['target_code']}/run"
        "?force=true&resume_from_latest_checkpoint=true"
    )

    assert r.status_code == 200
    assert r.json()["job_id"] == "resume_job"
    assert r.json()["resume_from_latest_checkpoint"] is True
    assert r.json()["resume_from_page"] == 12
    assert fake.kw["force"] is True
    assert fake.kw["resume_from_page"] == 12


def test_run_target_resume_uses_highest_checkpoint_page(db, monkeypatch):
    with db["SM"]() as s:
        t = s.query(CrawlTarget).filter_by(target_code=db["target_code"]).one()
        t.track_checkpoint = True
        now = datetime.utcnow()
        s.add_all([
            CrawlJob(
                job_id="older_farther_checkpoint",
                host="demo.example.com",
                site_code=db["site_code"],
                target_code=db["target_code"],
                source="manual",
                status="cancelled",
                force=True,
                stop_requested=False,
                last_completed_page=80,
                enqueued_at=now - timedelta(days=3),
                finished_at=now - timedelta(days=3),
            ),
            CrawlJob(
                job_id="newer_short_checkpoint",
                host="demo.example.com",
                site_code=db["site_code"],
                target_code=db["target_code"],
                source="manual",
                status="cancelled",
                force=True,
                stop_requested=False,
                last_completed_page=20,
                enqueued_at=now,
                finished_at=now,
            ),
        ])
        s.commit()

    class FakeQueue:
        def __init__(self):
            self.kw = None

        async def submit(self, **kw):
            self.kw = kw
            return "resume_job"

    fake = FakeQueue()
    monkeypatch.setattr("govcrawler.api.task_queue.get_queue", lambda: fake)

    r = _c().post(
        f"/admin/api/targets/{db['target_code']}/run"
        "?force=true&resume_from_latest_checkpoint=true"
    )

    assert r.status_code == 200
    assert r.json()["resume_from_page"] == 80
    assert fake.kw["resume_from_page"] == 80


def test_run_target_resume_requires_saved_checkpoint(db, monkeypatch):
    with db["SM"]() as s:
        t = s.query(CrawlTarget).filter_by(target_code=db["target_code"]).one()
        t.track_checkpoint = True
        s.commit()

    class FakeQueue:
        async def submit(self, **_kw):
            raise AssertionError("resume without checkpoint should not enqueue")

    monkeypatch.setattr("govcrawler.api.task_queue.get_queue", lambda: FakeQueue())

    r = _c().post(
        f"/admin/api/targets/{db['target_code']}/run"
        "?force=true&resume_from_latest_checkpoint=true"
    )

    assert r.status_code == 409
    assert "no saved checkpoint" in r.text


# ---------- logs ----------

def test_logs_list_newest_first(db):
    body = _c().get("/admin/api/logs?limit=50").json()
    assert body["count"] == 2
    ids = [r["id"] for r in body["items"]]
    assert ids == sorted(ids, reverse=True)
    # joined codes surface correctly
    assert body["items"][0]["site_code"] == "demo"
    assert body["items"][0]["target_code"] == db["target_code"]


def test_logs_filter_only_failed(db):
    body = _c().get("/admin/api/logs?only_failed=true").json()
    assert body["count"] == 1
    assert body["items"][0]["success"] is False


def test_logs_filter_by_target_code(db):
    body = _c().get(f"/admin/api/logs?target={db['target_code']}").json()
    assert body["count"] == 2
    body2 = _c().get("/admin/api/logs?target=nope").json()
    assert body2["count"] == 0


def test_log_retry_queues_fetch(db, monkeypatch):
    called = {"n": 0, "args": None}

    def fake_fetch(*, target_code, url):
        called["n"] += 1
        called["args"] = (target_code, url)
        return {"status": "ready"}

    monkeypatch.setattr("govcrawler.pipeline.fetch_and_store", fake_fetch)

    r = _c().post("/admin/api/logs/2/retry")
    assert r.status_code == 200
    assert called["n"] == 1
    assert called["args"] == (db["target_code"], "https://demo/x")


def test_log_retry_404(db):
    r = _c().post("/admin/api/logs/99999/retry")
    assert r.status_code == 404


# ---------- articles search ----------

def test_articles_search_by_keyword(db):
    body = _c().get("/admin/api/articles/search?q=关键词").json()
    assert body["count"] == 1
    assert body["items"][0]["id"] == 1


def test_articles_search_filter_status(db):
    body = _c().get("/admin/api/articles/search?status=failed").json()
    assert body["count"] == 1
    assert body["items"][0]["id"] == 2


def test_articles_search_filter_by_target(db):
    body = _c().get(f"/admin/api/articles/search?target={db['target_code']}").json()
    assert body["count"] == 2


def test_articles_search_includes_rag_export_state(db):
    with db["SM"]() as s:
        a = s.get(Article, 1)
        a.rag_export_status = "failed"
        a.rag_export_error = "embedding failed"
        a.rag_export_task_ids = ["task-1"]
        s.commit()

    body = _c().get("/admin/api/articles/search").json()
    row = next(x for x in body["items"] if x["id"] == 1)
    assert row["rag_export_status"] == "failed"
    assert row["rag_export_error"] == "embedding failed"
    assert row["rag_export_task_ids"] == ["task-1"]


def test_rag_retry_queues_unexported_and_failed_ready_articles(db, monkeypatch):
    called = {"ids": None}

    def fake_retry(article_ids):
        called["ids"] = article_ids

    monkeypatch.setattr(
        "govcrawler.api.admin.articles.retry_failed_articles_to_rag",
        fake_retry,
    )
    with db["SM"]() as s:
        a1 = s.get(Article, 1)
        a1.rag_export_status = None
        a2 = s.get(Article, 2)
        a2.rag_export_status = "failed"
        site = s.query(CrawlSite).filter_by(site_code="demo").one()
        target = s.query(CrawlTarget).filter_by(target_code=db["target_code"]).one()
        s.add(Article(
            id=3,
            site_id=site.id,
            target_id=target.id,
            url="https://demo/3",
            url_hash="c" * 64,
            title="ready failed rag",
            status="ready",
            content_text="ready failed rag",
            has_attachment=False,
            fetch_strategy="httpx",
            fetched_at=datetime.utcnow(),
            rag_export_status="failed",
        ))
        s.commit()

    r = _c().post("/admin/api/articles/rag-retry", json={"article_ids": [1, 2, 3, 999]})
    assert r.status_code == 200
    body = r.json()
    assert body["queued_count"] == 2
    assert body["article_ids"] == [1, 3]
    assert called["ids"] == [1, 3]
    skipped = {x["article_id"]: x["reason"] for x in body["skipped"]}
    assert skipped[2] == "article_not_ready"
    assert skipped[999] == "not_found"

    with db["SM"]() as s:
        a1 = s.get(Article, 1)
        assert a1.rag_export_status == "pending"
        assert a1.rag_export_error is None
        assert a1.rag_export_task_ids == []


def test_rag_import_all_queues_global_pending_and_failed_ready_articles(db, monkeypatch):
    called = {"ids": None}

    def fake_import_all(article_ids):
        called["ids"] = article_ids

    monkeypatch.setattr(
        "govcrawler.api.admin.articles.import_all_pending_articles_to_rag",
        fake_import_all,
    )
    with db["SM"]() as s:
        site = s.query(CrawlSite).filter_by(site_code="demo").one()
        target = s.query(CrawlTarget).filter_by(target_code=db["target_code"]).one()
        a1 = s.get(Article, 1)
        a1.rag_export_status = None
        s.add(Article(
            id=3,
            site_id=site.id,
            target_id=target.id,
            url="https://demo/3",
            url_hash="c" * 64,
            title="ready failed rag",
            status="ready",
            content_text="ready failed rag",
            has_attachment=False,
            fetch_strategy="httpx",
            fetched_at=datetime.utcnow(),
            rag_export_status="failed",
        ))
        s.commit()

    r = _c().post("/admin/api/articles/rag-import-all")
    assert r.status_code == 200
    body = r.json()
    assert body["queued"] is True
    assert body["eligible_count"] == 2
    assert called["ids"] == [1, 3]


# ---------- stats ----------

def test_stats_summary(db):
    body = _c().get("/admin/api/stats").json()
    assert body["total_articles"] == 2
    assert body["ready_articles"] == 1
    assert body["unexported_to_rag"] == 1
    assert body["fetch_attempts_24h"] == 2
    assert abs(body["success_rate_24h"] - 0.5) < 1e-6
    # v2 adds site/target counts
    assert body["sites"] == {"total": 1, "enabled": 1}
    assert body["targets"] == {"total": 1, "enabled": 1}


# ---------- attachments ----------

def test_attachments_list_and_filter(db):
    from govcrawler.models import Attachment
    with db["SM"]() as s:
        s.add(Attachment(
            article_id=1, file_name="budget_q1.pdf", file_ext="pdf",
            size_bytes=123456, file_path="/data/a/budget.pdf",
            file_hash="f" * 64,
        ))
        s.add(Attachment(
            article_id=1, file_name="notes.docx", file_ext="docx",
            size_bytes=2048, file_path="/data/a/notes.docx",
        ))
        s.commit()

    body = _c().get("/admin/api/attachments").json()
    assert body["count"] == 2
    names = {x["file_name"] for x in body["items"]}
    assert names == {"budget_q1.pdf", "notes.docx"}
    # joins bring site/target codes through
    assert all(x["site_code"] == "demo" and x["target_code"] == db["target_code"]
               for x in body["items"])

    # ext filter
    pdf_only = _c().get("/admin/api/attachments?ext=pdf").json()
    assert pdf_only["count"] == 1
    assert pdf_only["items"][0]["file_name"] == "budget_q1.pdf"

    # q filter (filename substring)
    q_only = _c().get("/admin/api/attachments?q=budget").json()
    assert q_only["count"] == 1

    # target filter mismatch → empty
    assert _c().get("/admin/api/attachments?target=nope").json()["count"] == 0


# ---------- alerts ----------

def test_alerts_live_runs_rules(db, monkeypatch):
    """run_checks is called against the test session; we stub it with a fake."""
    from govcrawler.alerting import checks as checks_mod
    from govcrawler.alerting.checks import AlertRule

    def fake_run(*, now=None, session=None):
        assert session is not None  # admin always passes the request session
        return [
            AlertRule(code="R1_SUCCESS_RATE", site_code="demo",
                      target_code=db["target_code"], message="[R1] test"),
            AlertRule(code="R2_BLOCK_RATE", site_code="demo",
                      target_code=None, message="[R2] test"),
        ]

    monkeypatch.setattr(checks_mod, "run_checks", fake_run)

    body = _c().get("/admin/api/alerts").json()
    assert body["count"] == 2
    assert body["by_code"] == {"R1_SUCCESS_RATE": 1, "R2_BLOCK_RATE": 1}
    codes = [x["code"] for x in body["items"]]
    assert "R1_SUCCESS_RATE" in codes and "R2_BLOCK_RATE" in codes


def test_alerts_empty_when_healthy(db):
    # Our fixture has 2 logs — below R1_MIN_SAMPLES=5 — so no alerts fire
    body = _c().get("/admin/api/alerts").json()
    assert body["count"] == 0
    assert body["items"] == []


# ---------- departments ----------

def test_departments_lists_mapped_and_pending(db):
    from govcrawler.models import CrawlSite, SiteDepartment
    with db["SM"]() as s:
        site_id = s.query(CrawlSite).filter_by(site_code="demo").one().id
        s.add(LocalDepartment(dept_id=777, dept_name="财政局", full_name="示例区财政局"))
        s.add(SiteDepartment(
            site_id=site_id, dept_path="czj",
            dept_binding="mapped", local_dept_id=777,
            dept_display_name="财政局",
        ))
        s.add(SiteDepartment(
            site_id=site_id, dept_path="unknown",
            dept_binding="pending",
            dept_display_name="待映射",
        ))
        s.commit()

    body = _c().get("/admin/api/departments?site=demo").json()
    assert body["count"] == 2
    by_path = {d["dept_path"]: d for d in body["items"]}
    assert by_path["czj"]["dept_binding"] == "mapped"
    assert by_path["czj"]["local_dept_id"] == 777
    assert by_path["czj"]["local_dept_name"] == "财政局"
    assert by_path["unknown"]["dept_binding"] == "pending"
    assert by_path["unknown"]["local_dept_id"] is None

    # binding filter
    only_pending = _c().get("/admin/api/departments?binding=pending").json()
    assert only_pending["count"] == 1
    assert only_pending["items"][0]["dept_path"] == "unknown"

    # q filter
    hits = _c().get("/admin/api/departments?q=czj").json()
    assert hits["count"] == 1


def test_local_departments_lookup(db):
    with db["SM"]() as s:
        s.add(LocalDepartment(dept_id=999, dept_name="教育局", full_name="示例教育局"))
        s.commit()
    body = _c().get("/admin/api/local-departments?q=教育").json()
    assert body["count"] == 1
    assert body["items"][0]["dept_id"] == 999


def test_column_registry_listing(db):
    with db["SM"]() as s:
        site = s.query(CrawlSite).filter_by(site_code="demo").one()
        target = s.query(CrawlTarget).filter_by(target_code=db["target_code"]).one()
        s.add(MasterColumnRegistry(
            adapter_id="gkmlpt",
            site_id=site.id,
            column_id="2849",
            column_name="规章文件",
            column_path="信息公开｜政策文件｜规章文件",
            topic_tags=["规章文件", "政策文件"],
            post_count=42,
            subscribed_target_id=target.id,
            active=True,
        ))
        s.commit()
    body = _c().get("/admin/api/column-registry?adapter=gkmlpt&subscribed=yes").json()
    assert body["count"] == 1
    assert body["items"][0]["column_id"] == "2849"
    assert body["items"][0]["target_code"] == db["target_code"]


def test_subscriptions_listing(db):
    with db["SM"]() as s:
        site = s.query(CrawlSite).filter_by(site_code="demo").one()
        target = s.query(CrawlTarget).filter_by(target_code=db["target_code"]).one()
        s.add(MasterColumnRegistry(
            adapter_id="gkmlpt",
            site_id=site.id,
            column_id="5074",
            column_name="规划计划",
            column_path="信息公开｜规划计划",
            subscribed_target_id=target.id,
            active=True,
        ))
        s.commit()
    body = _c().get("/admin/api/subscriptions").json()
    assert body["count"] == 1
    assert body["items"][0]["target_code"] == db["target_code"]
    assert body["items"][0]["column_id"] == "5074"


def test_registry_subscribe_creates_target_and_links_row(db):
    with db["SM"]() as s:
        site = s.query(CrawlSite).filter_by(site_code="demo").one()
        s.add(MasterColumnRegistry(
            adapter_id="gkmlpt",
            site_id=site.id,
            column_id="8120",
            column_name="通知公告",
            column_path="信息公开｜通知公告",
            active=True,
        ))
        s.commit()
        registry_id = s.query(MasterColumnRegistry).filter_by(column_id="8120").one().id

    payload = {
        "entry_url": "https://demo.example.com/gkmlpt/index#8120",
        "content_category": "通知公告",
    }
    r = _c().post(f"/admin/api/column-registry/{registry_id}/subscribe", json=payload)
    assert r.status_code == 200
    assert r.json()["subscription"]["column_id"] == "8120"

    with db["SM"]() as s:
        mcr = s.get(MasterColumnRegistry, registry_id)
        assert mcr is not None
        assert mcr.subscribed_target_id is not None
        t = s.get(CrawlTarget, mcr.subscribed_target_id)
        assert t is not None
        assert t.target_code == "demo__8120"
        assert t.entry_url == "https://demo.example.com/gkmlpt/index#8120"
        assert t.content_category == "通知公告"


def test_registry_batch_preview_and_subscribe(db):
    with db["SM"]() as s:
        site = s.query(CrawlSite).filter_by(site_code="demo").one()
        s.add_all([
            MasterColumnRegistry(
                adapter_id="gkmlpt",
                site_id=site.id,
                column_id="3860",
                column_name="规划计划",
                column_path="信息公开｜规划计划",
                active=True,
            ),
            MasterColumnRegistry(
                adapter_id="gkmlpt",
                site_id=site.id,
                column_id="2849",
                column_name="规章文件",
                column_path="信息公开｜政策文件｜规章文件",
                active=True,
            ),
        ])
        s.commit()
        ids = [
            row.id
            for row in s.query(MasterColumnRegistry)
            .filter(MasterColumnRegistry.column_id.in_(["3860", "2849"]))
            .order_by(MasterColumnRegistry.column_id)
        ]

    preview = _c().post(
        "/admin/api/column-registry/batch-preview",
        json={
            "registry_ids": ids,
            "entry_url_template": "{base_url}/gkmlpt/index#{column_id}",
            "content_category": "政策文件",
        },
    )
    assert preview.status_code == 200
    body = preview.json()
    assert body["count"] == 2
    assert all(item["ready"] is True for item in body["items"])
    assert {item["target_code"] for item in body["items"]} == {"demo__2849", "demo__3860"}

    r = _c().post(
        "/admin/api/column-registry/batch-subscribe",
        json={
            "registry_ids": ids,
            "entry_url_template": "{base_url}/gkmlpt/index#{column_id}",
            "content_category": "政策文件",
        },
    )
    assert r.status_code == 200
    assert r.json()["count"] == 2

    with db["SM"]() as s:
        rows = s.query(MasterColumnRegistry).filter(MasterColumnRegistry.id.in_(ids)).all()
        assert all(row.subscribed_target_id is not None for row in rows)
        codes = {
            t.target_code
            for t in s.query(CrawlTarget)
            .filter(CrawlTarget.target_code.in_(["demo__2849", "demo__3860"]))
        }
        assert codes == {"demo__2849", "demo__3860"}


def test_create_and_update_department_binding(db):
    with db["SM"]() as s:
        s.add(LocalDepartment(dept_id=1001, dept_name="人社局", full_name="示例人社局"))
        s.commit()

    payload = {
        "site_code": "demo",
        "dept_path": "rsj",
        "dept_binding": "mapped",
        "local_dept_id": 1001,
        "dept_display_name": "人社局",
        "enabled": True,
    }
    r = _c().post("/admin/api/departments", json=payload)
    assert r.status_code == 200
    dept_id = r.json()["department"]["id"]
    assert r.json()["department"]["dept_path"] == "rsj"

    r2 = _c().put(f"/admin/api/departments/{dept_id}", json={"dept_binding": "external_ref", "local_dept_id": None})
    assert r2.status_code == 200
    assert r2.json()["department"]["dept_binding"] == "external_ref"
    assert r2.json()["department"]["local_dept_id"] is None


def test_department_health_marks_stale_targets(db):
    with db["SM"]() as s:
        site = s.query(CrawlSite).filter_by(site_code="demo").one()
        target = s.query(CrawlTarget).filter_by(target_code=db["target_code"]).one()
        s.add(LocalDepartment(dept_id=1002, dept_name="教育局", full_name="示例教育局"))
        dept = SiteDepartment(
            site_id=site.id,
            dept_path="jyj",
            dept_binding="mapped",
            local_dept_id=1002,
            dept_display_name="教育局",
        )
        s.add(dept)
        s.flush()
        target.site_department_id = dept.id
        target.dept_id = 1002
        target.expected_cadence_days = 1
        target.last_crawled_at = datetime.utcnow() - timedelta(days=2)
        target.last_article_time = datetime.utcnow() - timedelta(days=5)
        s.commit()
        dept_id = dept.id

    body = _c().get(f"/admin/api/departments/{dept_id}/health").json()
    assert body["department"]["dept_path"] == "jyj"
    assert body["department"]["target_count"] == 1
    assert body["department"]["stale_count"] == 1
    assert body["items"][0]["target_code"] == db["target_code"]
    assert body["items"][0]["stale"] is True


def test_target_dry_run_basic_mode(db):
    body = _c().post(f"/admin/api/targets/{db['target_code']}/dry-run", json={}).json()
    assert body["target_code"] == db["target_code"]
    assert body["mode"] in {"basic", "validator"}
    assert isinstance(body["checks"], list)


# ---------- parser override ----------

def test_parser_get_empty_then_put_and_roundtrip(db):
    code = db["target_code"]
    r = _c().get(f"/admin/api/targets/{code}/parser").json()
    assert r["target_code"] == code
    assert r["parser_override"] == {}

    payload = {"detail": {"title": "h1.t", "content": "div.c"}}
    r2 = _c().put(f"/admin/api/targets/{code}/parser", json=payload)
    assert r2.status_code == 200
    assert r2.json()["parser_override"] == payload

    # reload fresh
    again = _c().get(f"/admin/api/targets/{code}/parser").json()
    assert again["parser_override"] == payload


def test_parser_put_empty_clears(db):
    code = db["target_code"]
    _c().put(f"/admin/api/targets/{code}/parser", json={"detail": {"x": "y"}})
    r = _c().put(f"/admin/api/targets/{code}/parser", json={})
    assert r.status_code == 200
    assert r.json()["parser_override"] == {}
    with db["SM"]() as s:
        t = s.query(CrawlTarget).filter_by(target_code=code).one()
        assert t.parser_override_json is None


def test_parser_put_rejects_bad_shape(db):
    code = db["target_code"]
    r = _c().put(f"/admin/api/targets/{code}/parser", json={"detail": "not-a-dict"})
    assert r.status_code == 400


def test_parser_404(db):
    assert _c().get("/admin/api/targets/nope/parser").status_code == 404
    assert _c().put("/admin/api/targets/nope/parser", json={}).status_code == 404


# ---------- category ----------

def test_category_put_and_clear(db):
    code = db["target_code"]
    r = _c().put(
        f"/admin/api/targets/{code}/category",
        json={"content_category": "财政", "content_subcategory": "预算"},
    )
    assert r.status_code == 200
    body = r.json()
    assert body["content_category"] == "财政"
    assert body["content_subcategory"] == "预算"

    # clear subcategory with empty string
    r2 = _c().put(
        f"/admin/api/targets/{code}/category",
        json={"content_subcategory": ""},
    )
    assert r2.status_code == 200
    assert r2.json()["content_subcategory"] is None
    # content_category untouched
    assert r2.json()["content_category"] == "财政"


def test_category_rejects_unknown_keys(db):
    r = _c().put(
        f"/admin/api/targets/{db['target_code']}/category",
        json={"category": "x"},  # wrong key name
    )
    assert r.status_code == 400


def test_category_404(db):
    r = _c().put("/admin/api/targets/nope/category", json={"content_category": "x"})
    assert r.status_code == 404


# ---------- managed_by + DELETE (direct-DB CRUD, §7.5.3 [2.0-补丁]) ----------

def test_create_site_tags_managed_by_ui(db):
    """Sites created through the admin UI must be tagged managed_by='ui' so
    config.sync.sync_dir leaves them alone."""
    payload = {
        "site_code": "ui_only",
        "site_name": "UI 直建站",
        "base_url": "https://ui.example.com",
        "yaml_path": "config/sites/ui_only.yaml",
    }
    r = _c().post("/admin/api/sites", json=payload)
    assert r.status_code == 200
    assert r.json()["site"]["managed_by"] == "ui"

    # and the bare list surfaces the tag too
    body = _c().get("/admin/api/sites").json()
    row = next(s for s in body["sites"] if s["site_code"] == "ui_only")
    assert row["managed_by"] == "ui"


def test_delete_target_hard_deletes_and_keeps_articles(db):
    """Article.target_id is ondelete=SET NULL in Postgres — deleting a target
    must leave the article rows intact (only the FK is cleared on real DB).
    In the SQLite test harness we only assert the article rows survive.
    """
    target_code = db["target_code"]
    r = _c().delete(f"/admin/api/targets/{target_code}")
    assert r.status_code == 200
    assert r.json()["deleted"] is True

    with db["SM"]() as s:
        assert s.query(CrawlTarget).filter_by(target_code=target_code).first() is None
        # articles survive — we don't hard-delete content when a target is removed
        articles = s.query(Article).filter_by(site_id=1).all()
        assert len(articles) == 2


def test_delete_target_404(db):
    r = _c().delete("/admin/api/targets/does_not_exist")
    assert r.status_code == 404


def test_delete_site_refuses_when_articles_reference(db):
    """Article.site_id is ondelete=RESTRICT — refuse to delete so we don't
    orphan historical content. Operator should disable the site instead."""
    r = _c().delete("/admin/api/sites/demo")
    assert r.status_code == 409
    assert "article" in r.json()["detail"].lower()

    # site still exists
    with db["SM"]() as s:
        assert s.query(CrawlSite).filter_by(site_code="demo").first() is not None


def test_delete_site_succeeds_when_no_articles(db):
    """Create a clean UI-only site with no articles, confirm DELETE wipes
    it plus its cascaded children."""
    _c().post("/admin/api/sites", json={
        "site_code": "del_me", "site_name": "待删",
        "base_url": "https://del.example.com",
        "yaml_path": "config/sites/del_me.yaml",
    })
    r = _c().delete("/admin/api/sites/del_me")
    assert r.status_code == 200
    assert r.json()["deleted"] is True

    with db["SM"]() as s:
        assert s.query(CrawlSite).filter_by(site_code="del_me").first() is None


def test_delete_site_404(db):
    r = _c().delete("/admin/api/sites/nope")
    assert r.status_code == 404


def test_delete_department_refuses_when_targets_reference(db):
    """crawl_target.site_department_id is ondelete=CASCADE — refuse deletion
    if any target still points at this dept so operators don't wipe a batch
    of targets by clicking one button."""
    # attach the demo target to a freshly created site_department
    with db["SM"]() as s:
        site = s.query(CrawlSite).filter_by(site_code="demo").one()
        dept = SiteDepartment(
            site_id=site.id, dept_path="finance",
            dept_binding="pending", dept_display_name="财政",
        )
        s.add(dept)
        s.flush()
        tgt = s.query(CrawlTarget).filter_by(target_code=db["target_code"]).one()
        tgt.site_department_id = dept.id
        s.commit()
        dept_id = dept.id

    r = _c().delete(f"/admin/api/departments/{dept_id}")
    assert r.status_code == 409
    assert "target" in r.json()["detail"].lower()


def test_delete_department_succeeds_when_no_targets(db):
    with db["SM"]() as s:
        site = s.query(CrawlSite).filter_by(site_code="demo").one()
        dept = SiteDepartment(
            site_id=site.id, dept_path="orphan",
            dept_binding="pending", dept_display_name="无目标部门",
        )
        s.add(dept)
        s.commit()
        dept_id = dept.id

    r = _c().delete(f"/admin/api/departments/{dept_id}")
    assert r.status_code == 200
    with db["SM"]() as s:
        assert s.get(SiteDepartment, dept_id) is None


def test_delete_department_404(db):
    r = _c().delete("/admin/api/departments/99999")
    assert r.status_code == 404


def test_sync_dir_skips_managed_by_ui_sites(db, tmp_path):
    """`config.sync.sync_dir` must leave UI-managed rows alone — otherwise the
    instant someone runs the sync CLI, every UI-created site gets disabled."""
    from govcrawler.config.sync import sync_dir

    # create a UI-managed site via the API (tags it managed_by='ui')
    _c().post("/admin/api/sites", json={
        "site_code": "ui_island",
        "site_name": "UI 孤岛",
        "base_url": "https://island.example.com",
        "yaml_path": "config/sites/ui_island.yaml",
        "enabled": True,
    })

    empty_yaml_dir = tmp_path / "empty_yaml"
    empty_yaml_dir.mkdir()

    with db["SM"]() as s:
        report = sync_dir(s, empty_yaml_dir)
        s.commit()

    # The demo fixture site is yaml-managed (default) and gets disabled
    # because it's not in the empty dir. The UI site must stay enabled.
    with db["SM"]() as s:
        ui_row = s.query(CrawlSite).filter_by(site_code="ui_island").one()
        assert ui_row.enabled is True
        assert ui_row.managed_by == "ui"
        demo_row = s.query(CrawlSite).filter_by(site_code="demo").one()
        assert demo_row.enabled is False

    # report should not count the UI row among disabled
    assert all("ui_island" not in w for w in report.warnings)


def test_openstd_captcha_download_inserts_attachment(db, monkeypatch):
    from govcrawler.api.admin import articles as articles_mod

    with db["SM"]() as s:
        site, target = make_site_and_target(
            s,
            site_code="openstd_samr",
            column_id="mandatory_national_standards",
        )
        article = Article(
            id=9001,
            site_id=site.id,
            target_id=target.id,
            native_post_id="8DFBEDE88D831247BF30901B1B42E8D0",
            url="https://openstd.samr.gov.cn/bzgk/std/newGbInfo?hcno=8DF",
            url_hash="d" * 64,
            title="移动电源安全技术规范",
            doc_no="GB 47372-2026",
            status="ready",
            content_text="标准号：GB 47372-2026",
            has_attachment=False,
            fetch_strategy="httpx",
            fetched_at=datetime.utcnow(),
            metadata_json={
                "public_meta": {
                    "openstd_hcno": "8DFBEDE88D831247BF30901B1B42E8D0",
                    "std_no": "GB 47372-2026",
                }
            },
        )
        s.add(article)
        s.commit()

    sess = SimpleNamespace(
        session_id="sess_1",
        article_id=9001,
        captcha_bytes=b"jpg",
        captcha_content_type="image/jpeg",
        expires_at=9999999999,
    )
    monkeypatch.setattr(articles_mod.openstd_download, "start_session", lambda *a, **k: sess)
    monkeypatch.setattr(articles_mod.openstd_download, "get_session", lambda *a, **k: sess)
    monkeypatch.setattr(articles_mod.openstd_download, "refresh_captcha", lambda *a, **k: sess)
    monkeypatch.setattr(articles_mod.openstd_download, "close_session", lambda *a, **k: None)
    monkeypatch.setattr(
        articles_mod.openstd_download,
        "submit_captcha_and_download",
        lambda *a, **k: DownloadedAttachment(
            file_name="GB_47372-2026.pdf",
            file_ext="pdf",
            size_bytes=7,
            file_hash="e" * 64,
            file_path=PurePosixPath("openstd_samr/attachments/GB_47372-2026.pdf"),
        ),
    )

    client = _c()
    start = client.post("/admin/api/articles/9001/openstd-download/start")
    assert start.status_code == 200
    assert start.json()["captcha_url"].endswith("/admin/api/openstd-download-sessions/sess_1/captcha")

    captcha = client.get("/admin/api/openstd-download-sessions/sess_1/captcha")
    assert captcha.status_code == 200
    assert captcha.content == b"jpg"

    submit = client.post(
        "/admin/api/openstd-download-sessions/sess_1/submit",
        json={"captcha": "1234"},
    )
    assert submit.status_code == 200
    body = submit.json()
    assert body["attachment"]["file_name"] == "GB_47372-2026.pdf"
    with db["SM"]() as s:
        article = s.get(Article, 9001)
        assert article.has_attachment is True
        att = s.query(Attachment).filter_by(article_id=9001).one()
        assert att.file_hash == "e" * 64


def test_standard_downloader_role_only_sees_download_page_apis(db, monkeypatch):
    monkeypatch.setenv("ADMIN_USER", "admin")
    monkeypatch.setenv("ADMIN_PASSWORD", "admin-pass")
    monkeypatch.setenv("STANDARD_DOWNLOADER_USER", "std")
    monkeypatch.setenv("STANDARD_DOWNLOADER_PASSWORD", "std-pass")

    with db["SM"]() as s:
        site, target = make_site_and_target(
            s,
            site_code="openstd_samr",
            column_id="mandatory_national_standards",
        )
        article = Article(
            id=9101,
            site_id=site.id,
            target_id=target.id,
            native_post_id="810C43CEBDDA1F7D475550FF18A61A90",
            url="https://openstd.samr.gov.cn/bzgk/std/newGbInfo?hcno=810C",
            url_hash="f" * 64,
            title="轧制设备安全技术条件",
            doc_no="GB 47367-2026",
            status="ready",
            content_text="标准元数据",
            has_attachment=False,
            fetch_strategy="httpx",
            fetched_at=datetime.utcnow(),
        )
        s.add(article)
        s.flush()
        s.add(ArticleStandardMeta(
            article_id=article.id,
            standard_no="GB 47367-2026",
            chinese_title="轧制设备安全技术条件",
            standard_status="即将实施",
            standard_type="强制性国家标准",
        ))
        s.commit()

    client = _c()
    auth = ("std", "std-pass")
    assert client.get("/admin/api/me", auth=auth).json()["role"] == "standard_downloader"

    pending = client.get("/admin/api/standard-attachments/pending", auth=auth)
    assert pending.status_code == 200
    body = pending.json()
    assert body["total"] == 1
    assert body["items"][0]["id"] == 9101

    forbidden = client.get("/admin/api/sites", auth=auth)
    assert forbidden.status_code == 403

    admin_ok = client.get("/admin/api/sites", auth=("admin", "admin-pass"))
    assert admin_ok.status_code == 200
