"""One-shot: backfill site_department rows from existing crawl_target dept_paths.

Background: bulk_create_targets used to create crawl_target rows without an
accompanying site_department row, so the 部门绑定对账 page (which lists
site_department) showed only depts created via the v2 yaml sync. fogang /
yingde dept_paths were invisible despite having dozens of targets.

This script:
  1) For every crawl_target with a `<site>__<dept>__<col>` target_code (or
     any non-NULL dept_path-bearing target) that has site_department_id=NULL,
     upsert a site_department row keyed by (site_id, dept_path).
  2) Set crawl_target.site_department_id to point at it.
  3) When the target carries a dept_id (local OA dept FK), promote a
     pending site_department to mapped + local_dept_id (only if it was
     pending — never overwrite an operator-set binding).

Idempotent: running again is a no-op once everything is linked.
"""
from __future__ import annotations

import sys
from collections import defaultdict
from pathlib import Path

# Make `govcrawler` importable when running as a script
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

from sqlalchemy import select  # noqa: E402

from govcrawler.db import get_sessionmaker  # noqa: E402
from govcrawler.models import CrawlSite, CrawlTarget, SiteDepartment  # noqa: E402


def _decompose(site_code: str, target_code: str) -> str | None:
    """Return dept_path from `<site>__<dept>__<col>`; None for `<site>__<col>`."""
    prefix = f"{site_code}__"
    if not target_code.startswith(prefix):
        return None
    tail = target_code[len(prefix):]
    parts = tail.split("__", 1)
    return parts[0] if len(parts) == 2 else None


def main() -> int:
    Session = get_sessionmaker()
    with Session() as s:
        # Build (site_id, dept_path) → list of CrawlTarget rows lacking link
        groups: dict[tuple[int, str], list[CrawlTarget]] = defaultdict(list)
        sites_by_id: dict[int, CrawlSite] = {}

        rows = s.execute(
            select(CrawlTarget, CrawlSite)
            .join(CrawlSite, CrawlSite.id == CrawlTarget.site_id)
            .where(CrawlTarget.site_department_id.is_(None))
        ).all()

        for t, site in rows:
            dept_path = _decompose(site.site_code, t.target_code)
            if not dept_path:
                continue  # site-level target (e.g. wy_news__domestic) — skip
            groups[(site.id, dept_path)].append(t)
            sites_by_id[site.id] = site

        if not groups:
            print("nothing to backfill — all targets already linked.")
            return 0

        created_dept_rows = 0
        promoted_dept_rows = 0
        linked_targets = 0

        for (site_id, dept_path), targets in groups.items():
            site = sites_by_id[site_id]
            dept_id = next(
                (t.dept_id for t in targets if t.dept_id is not None), None
            )
            sd = s.execute(
                select(SiteDepartment).where(
                    SiteDepartment.site_id == site_id,
                    SiteDepartment.dept_path == dept_path,
                )
            ).scalar_one_or_none()

            if sd is None:
                sd = SiteDepartment(
                    site_id=site_id,
                    dept_path=dept_path,
                    local_dept_id=dept_id,
                    dept_binding="mapped" if dept_id is not None else "pending",
                )
                s.add(sd)
                s.flush()
                created_dept_rows += 1
                print(
                    f"  + site_department site={site.site_code} dept_path={dept_path} "
                    f"binding={sd.dept_binding} local_dept_id={dept_id}"
                )
            elif sd.dept_binding == "pending" and dept_id is not None:
                sd.local_dept_id = dept_id
                sd.dept_binding = "mapped"
                promoted_dept_rows += 1
                print(
                    f"  ↑ promote site={site.site_code} dept_path={dept_path} "
                    f"→ mapped(local_dept_id={dept_id})"
                )

            for t in targets:
                t.site_department_id = sd.id
                linked_targets += 1

        s.commit()
        print(
            f"\ndone. created={created_dept_rows} promoted={promoted_dept_rows} "
            f"linked_targets={linked_targets}"
        )
        return 0


if __name__ == "__main__":
    raise SystemExit(main())
