"""One-shot: copy the existing postgres govcrawler dataset into the MySQL
deployment at 192.168.1.222. Uses the shared SQLAlchemy ORM so type
conversions (JSON, DateTime, Boolean) happen for free.

Order is FK-safe. local_department is upserted (some rows already seeded
from xlsx); every other table is expected to be empty on the MySQL side.

Run once after switching .env to MySQL and applying migrations:
  uv run python scripts/pg_to_mysql.py
"""
from __future__ import annotations

from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session

from govcrawler.models import (
    Article,
    Attachment,
    Base,
    CrawlLog,
    CrawlSite,
    CrawlTarget,
    LocalDepartment,
    MasterColumnRegistry,
    SiteDepartment,
)

PG_URL = "postgresql+psycopg://govcrawler:govcrawler@localhost:5433/govcrawler"
# Re-read current MySQL URL from settings so we stay in sync with .env
from govcrawler.settings import get_settings

MY_URL = get_settings().db_url
print(f"source (pg):     {PG_URL}")
print(f"target (mysql):  {MY_URL}")

pg_eng = create_engine(PG_URL)
my_eng = create_engine(MY_URL)


def _copy_all(src: Session, dst: Session, model, *, upsert_on_pk: bool = False) -> int:
    """Copy every row of `model` from src to dst. When upsert_on_pk, rows
    whose primary key already exists on dst are updated instead of skipped."""
    rows = list(src.execute(select(model)).scalars())
    pk_cols = [c.name for c in model.__table__.primary_key.columns]
    copied = 0
    for r in rows:
        payload = {
            c.name: getattr(r, c.name)
            for c in model.__table__.columns
        }
        if upsert_on_pk:
            pk_vals = [payload[c] for c in pk_cols]
            existing = dst.get(model, pk_vals[0] if len(pk_vals) == 1 else tuple(pk_vals))
            if existing is not None:
                for k, v in payload.items():
                    if k not in pk_cols:
                        setattr(existing, k, v)
                continue
        dst.add(model(**payload))
        copied += 1
    dst.flush()
    return copied


def main() -> None:
    with Session(pg_eng) as src, Session(my_eng) as dst:
        print("\n== step 1: crawl_site ==")
        n = _copy_all(src, dst, CrawlSite)
        print(f"  inserted {n} rows")

        print("\n== step 2: local_department (upsert — 339 already seeded from xlsx) ==")
        n = _copy_all(src, dst, LocalDepartment, upsert_on_pk=True)
        print(f"  inserted {n} new rows (existing rows refreshed in place)")

        print("\n== step 3: site_department ==")
        n = _copy_all(src, dst, SiteDepartment)
        print(f"  inserted {n} rows")

        print("\n== step 4: crawl_target ==")
        n = _copy_all(src, dst, CrawlTarget)
        print(f"  inserted {n} rows")

        print("\n== step 5: master_column_registry ==")
        n = _copy_all(src, dst, MasterColumnRegistry)
        print(f"  inserted {n} rows")

        print("\n== step 6: article ==")
        n = _copy_all(src, dst, Article)
        print(f"  inserted {n} rows")

        print("\n== step 7: attachment ==")
        n = _copy_all(src, dst, Attachment)
        print(f"  inserted {n} rows")

        print("\n== step 8: crawl_log ==")
        n = _copy_all(src, dst, CrawlLog)
        print(f"  inserted {n} rows")

        dst.commit()
        print("\nALL DONE — target committed")


if __name__ == "__main__":
    main()
