"""v2 YAML loader + yaml→DB sync tests.

Covers:
  * Loader rejects malformed dept_binding + local_dept_id pairings
  * Loader auto-fills yaml_path when cms_adapter absent (crawl_site CHECK)
  * Sync is idempotent: running twice leaves DB identical (including no-ops)
  * Sync disables (not deletes) orphan rows when a YAML is removed
  * Real sample YAMLs in config/sites_v2/ load and sync cleanly
"""
from __future__ import annotations

from pathlib import Path

import pytest
from pydantic import ValidationError
from sqlalchemy import create_engine, event, select
from sqlalchemy.orm import Session

from govcrawler.config.sync import UnknownLocalDept, sync_dir, sync_file, sync_site
from govcrawler.config.v2 import SiteDocV2, load_v2, load_v2_dir
from govcrawler.models import Base, CrawlSite, CrawlTarget, LocalDepartment, SiteDepartment
from govcrawler.repositories import depts, sites, targets

REPO_ROOT = Path(__file__).resolve().parents[1]
V2_DIR = REPO_ROOT / "config" / "sites_v2"


# ---------------------------------------------------------------------------
# engine fixture (in-memory SQLite w/ FKs on)
# ---------------------------------------------------------------------------
@pytest.fixture
def engine():
    eng = create_engine("sqlite://", future=True)

    @event.listens_for(eng, "connect")
    def _fk_on(dbapi_conn, _):
        cur = dbapi_conn.cursor()
        cur.execute("PRAGMA foreign_keys=ON")
        cur.close()

    Base.metadata.create_all(eng)
    yield eng
    eng.dispose()


@pytest.fixture
def session(engine):
    with Session(engine) as s:
        # Seed OA snapshot rows referenced by v2 YAML samples. In production
        # this comes from a separate OA-sync job; tests stub it.
        s.add(LocalDepartment(dept_id=301, dept_name="统计局"))
        s.flush()
        yield s


def test_sync_fails_when_local_dept_missing(engine):
    """A mapped dept_binding referencing a non-existent local_dept_id must
    raise `UnknownLocalDept` BEFORE any DB write. This forces OA snapshot
    sync to run first and prevents orphan rows."""
    from govcrawler.config.v2 import load_v2
    # fresh session WITHOUT the seeded LocalDepartment
    with Session(engine) as s:
        doc = load_v2(V2_DIR / "qingcheng_fgw.yaml")
        with pytest.raises(UnknownLocalDept, match="301"):
            sync_site(s, doc, __import__("govcrawler.config.sync", fromlist=["SyncReport"]).SyncReport())


# ---------------------------------------------------------------------------
# LAYER A — loader
# ---------------------------------------------------------------------------
class TestLoaderValidation:
    def test_real_samples_load(self):
        """Every YAML in config/sites_v2/ must parse without error."""
        docs = load_v2_dir(V2_DIR)
        assert "gdqy" in docs
        assert "qingcheng_fgw" in docs

    def test_gdqy_has_site_level_columns_no_depts(self):
        doc = load_v2(V2_DIR / "gdqy.yaml")
        assert doc.depts == []
        assert len(doc.columns) >= 1
        # gdqy uses yaml_path, not cms_adapter
        assert doc.site.cms_adapter is None

    def test_qingcheng_fgw_has_three_dept_bindings(self):
        doc = load_v2(V2_DIR / "qingcheng_fgw.yaml")
        bindings = {d.dept_binding for d in doc.depts}
        assert bindings == {"mapped", "city_level", "cross_dept"}
        # mapped dept must carry local_dept_id
        mapped = [d for d in doc.depts if d.dept_binding == "mapped"][0]
        assert mapped.local_dept_id == 301

    def test_mapped_without_local_dept_rejected(self, tmp_path):
        p = tmp_path / "bad.yaml"
        p.write_text(
            "site:\n"
            "  code: bad\n"
            "  cms_adapter: x\n"
            "depts:\n"
            "  - dept_path: foo\n"
            "    dept_binding: mapped\n"
            "    columns: []\n",
            encoding="utf-8",
        )
        with pytest.raises(ValidationError):
            load_v2(p)

    def test_city_level_with_local_dept_rejected(self, tmp_path):
        p = tmp_path / "bad2.yaml"
        p.write_text(
            "site:\n"
            "  code: bad2\n"
            "  cms_adapter: x\n"
            "depts:\n"
            "  - dept_path: foo\n"
            "    dept_binding: city_level\n"
            "    local_dept_id: 42\n"
            "    columns: []\n",
            encoding="utf-8",
        )
        with pytest.raises(ValidationError):
            load_v2(p)

    def test_filename_must_match_site_code(self, tmp_path):
        p = tmp_path / "wrong_name.yaml"
        p.write_text(
            "site:\n  code: other_code\n  cms_adapter: x\n",
            encoding="utf-8",
        )
        with pytest.raises(ValueError, match="filename stem"):
            load_v2(p)

    def test_duplicate_column_id_rejected(self, tmp_path):
        p = tmp_path / "dup.yaml"
        p.write_text(
            "site:\n"
            "  code: dup\n"
            "  cms_adapter: x\n"
            "depts:\n"
            "  - dept_path: a\n"
            "    dept_binding: pending\n"
            "    columns:\n"
            "      - column_id: \"111\"\n"
            "  - dept_path: b\n"
            "    dept_binding: pending\n"
            "    columns:\n"
            "      - column_id: \"111\"\n",
            encoding="utf-8",
        )
        with pytest.raises(ValidationError, match="site-unique"):
            load_v2(p)


# ---------------------------------------------------------------------------
# LAYER B — sync
# ---------------------------------------------------------------------------
class TestSync:
    def test_sync_real_dir_populates_db(self, session):
        report = sync_dir(session, V2_DIR)
        session.flush()

        # crawl_site rows
        codes = {s.site_code for s in session.scalars(select(CrawlSite))}
        assert {"gdqy", "qingcheng_fgw", "flk_npc"} <= codes

        # gdqy: yaml_path stamped, cms_adapter NULL → XOR constraint satisfied
        gdqy = sites.get_by_code(session, "gdqy")
        assert gdqy.cms_adapter is None
        assert gdqy.yaml_path and gdqy.yaml_path.endswith("gdqy.yaml")

        # qingcheng_fgw: adapter path, yaml_path NULL
        qc = sites.get_by_code(session, "qingcheng_fgw")
        assert qc.cms_adapter == "gkmlpt"
        assert qc.yaml_path is None

        # qingcheng_fgw has 3 depts
        qc_depts = depts.list_for_site(session, qc.id)
        assert len(qc_depts) == 3

        # targets under the mapped dept
        mapped = next(d for d in qc_depts if d.dept_binding == "mapped")
        mapped_targets = targets.list_for_dept(session, mapped.id)
        assert len(mapped_targets) == 1
        assert mapped_targets[0].target_code == "qingcheng_fgw__qycsj__1234"

        # site-level targets on gdqy (no dept)
        gdqy_targets = targets.list_for_site(session, gdqy.id)
        assert any(t.site_department_id is None for t in gdqy_targets)

        flk = sites.get_by_code(session, "flk_npc")
        assert flk.cms_adapter == "flk_npc"
        flk_targets = targets.list_for_site(session, flk.id)
        assert len(flk_targets) == 1
        assert flk_targets[0].target_code == "flk_npc__laws"
        assert flk_targets[0].track_checkpoint is True

        # report numbers: first run is all creates
        assert report.sites_created == 3
        assert report.sites_updated == 0
        assert report.depts_created == 3
        # 3 qingcheng_fgw dept columns + 1 gdqy site-level + 1 flk_npc = 5
        assert report.targets_created == 5

    def test_sync_is_idempotent(self, session):
        """Second run must produce the same DB state and zero creates."""
        r1 = sync_dir(session, V2_DIR)
        session.commit()

        # snapshot
        before = {
            "sites": sorted(s.site_code for s in session.scalars(select(CrawlSite))),
            "depts": sorted((d.site_id, d.dept_path) for d in session.scalars(select(SiteDepartment))),
            "targets": sorted(t.target_code for t in session.scalars(select(CrawlTarget))),
        }

        r2 = sync_dir(session, V2_DIR)
        session.commit()

        after = {
            "sites": sorted(s.site_code for s in session.scalars(select(CrawlSite))),
            "depts": sorted((d.site_id, d.dept_path) for d in session.scalars(select(SiteDepartment))),
            "targets": sorted(t.target_code for t in session.scalars(select(CrawlTarget))),
        }
        assert before == after
        assert r2.sites_created == 0
        assert r2.depts_created == 0
        assert r2.targets_created == 0
        assert r2.sites_updated == r1.sites_created  # update-count on 2nd pass = create-count on 1st

    def test_removed_yaml_disables_not_deletes(self, session, tmp_path):
        """Drop a YAML from the dir → DB rows get enabled=False, not removed."""
        # Stage: copy both YAMLs into a scratch dir
        import shutil
        for p in V2_DIR.iterdir():
            shutil.copy(p, tmp_path / p.name)

        sync_dir(session, tmp_path)
        session.commit()
        qc = sites.get_by_code(session, "qingcheng_fgw")
        assert qc is not None and qc.enabled is True

        # Drop the qingcheng_fgw YAML and resync
        (tmp_path / "qingcheng_fgw.yaml").unlink()
        report = sync_dir(session, tmp_path)
        session.commit()

        qc_after = sites.get_by_code(session, "qingcheng_fgw")
        assert qc_after is not None  # row still exists
        assert qc_after.enabled is False  # just disabled
        assert report.sites_disabled == 1

    def test_disable_column_in_yaml_disables_target(self, session, tmp_path):
        """Editing YAML to remove a column should disable (not delete) the target."""
        src = V2_DIR / "qingcheng_fgw.yaml"
        dst = tmp_path / "qingcheng_fgw.yaml"
        dst.write_text(src.read_text(encoding="utf-8"), encoding="utf-8")
        sync_file(session, dst)
        session.commit()

        tc = "qingcheng_fgw__qycsj__1234"
        assert targets.get_by_code(session, tc).enabled is True

        # Rewrite YAML removing the qycsj column_id 1234 entirely
        new_yaml = """site:
  code: qingcheng_fgw
  name: 清城区发改局
  cms_adapter: gkmlpt
  default_fetch_strategy: httpx
depts:
  - dept_path: qycsj
    dept_binding: mapped
    local_dept_id: 301
    columns: []
"""
        dst.write_text(new_yaml, encoding="utf-8")
        report = sync_file(session, dst)
        session.commit()

        tgt = targets.get_by_code(session, tc)
        assert tgt is not None
        assert tgt.enabled is False
        assert report.targets_disabled >= 1


# ---------------------------------------------------------------------------
# integration: what a fresh operator experiences
# ---------------------------------------------------------------------------
def test_fresh_install_bootstrap(session):
    """End-to-end: empty DB → sync-yaml → query via repositories."""
    assert session.scalar(select(CrawlSite)) is None  # empty

    sync_dir(session, V2_DIR)
    session.commit()

    # The common "list enabled sites and their columns" query that the admin
    # API will run — proves the repo chain is wired correctly post-sync.
    for site in sites.list_enabled(session):
        for tgt in targets.list_for_site(session, site.id):
            assert tgt.target_code.startswith(site.site_code + "__")
