"""One-shot: backfill local_department.region from the Excel files in
`清远市及县区政府组成部门和乡镇/*.xlsx`.

Each xlsx corresponds to one region (清远市直/八县区). For every row we upsert
`region` onto the matching local_department.dept_id. Rows not in any xlsx keep
`region = NULL`; the admin UI's UI still has name/id-range fallbacks for those.

Idempotent — running again is safe and just overwrites region with the xlsx
truth. Run after deploying migration 0004_add_local_dept_region.
"""
from __future__ import annotations

import os
import sys
from pathlib import Path

import openpyxl
from sqlalchemy import update

# Make `govcrawler` importable when running as a script
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

from govcrawler.db import get_sessionmaker  # noqa: E402
from govcrawler.models import LocalDepartment  # noqa: E402

SOURCE_DIR = Path(__file__).resolve().parent.parent / "清远市及县区政府组成部门和乡镇"

FILE_REGION = {
    "01清远市.xlsx": "清远市",
    "02清城.xlsx": "清城区",
    "03清新.xlsx": "清新区",
    "04英德.xlsx": "英德市",
    "05佛冈.xlsx": "佛冈县",
    "06连州.xlsx": "连州市",
    "07连山.xlsx": "连山壮族瑶族自治县",
    "08连南.xlsx": "连南瑶族自治县",
    "09阳山.xlsx": "阳山县",
}


def build_mapping() -> dict[int, str]:
    mapping: dict[int, str] = {}
    for fname, region in FILE_REGION.items():
        wb = openpyxl.load_workbook(SOURCE_DIR / fname)
        for sn in wb.sheetnames:
            ws = wb[sn]
            for row in ws.iter_rows(min_row=2, values_only=True):
                if not row or row[0] is None:
                    continue
                try:
                    did = int(row[0])
                except (TypeError, ValueError):
                    continue
                prev = mapping.get(did)
                if prev and prev != region:
                    print(f"CONFLICT dept_id={did} {prev} vs {region} ({fname}) — keeping first")
                    continue
                mapping[did] = region
    return mapping


def main() -> None:
    mapping = build_mapping()
    print(f"resolved {len(mapping)} dept_id → region rows from {len(FILE_REGION)} xlsx files")

    SM = get_sessionmaker()
    with SM() as s:
        # Update in one pass per region (fewer statements) — still correct if
        # target rows are missing.
        by_region: dict[str, list[int]] = {}
        for did, region in mapping.items():
            by_region.setdefault(region, []).append(did)
        total = 0
        for region, ids in by_region.items():
            n = s.execute(
                update(LocalDepartment)
                .where(LocalDepartment.dept_id.in_(ids))
                .values(region=region)
            ).rowcount
            total += n
            print(f"  {region:20} → {n} rows updated")
        s.commit()
        print(f"done — {total} rows touched")


if __name__ == "__main__":
    main()
