"""One-shot: backfill crawl_target.content_category for rows where it's
NULL, using the same _classify_column_name keyword heuristic as the
bulk-create endpoints. Operator-set values aren't touched.
"""
from __future__ import annotations

import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

from govcrawler.api.admin.targets import _classify_column_name  # noqa: E402
from govcrawler.db import get_sessionmaker  # noqa: E402
from govcrawler.models import CrawlTarget  # noqa: E402


def main() -> int:
    Session = get_sessionmaker()
    n_set = 0
    n_no_match = 0
    with Session() as s:
        rows = (
            s.query(CrawlTarget)
            .filter(CrawlTarget.content_category.is_(None))
            .all()
        )
        for t in rows:
            label = _classify_column_name(t.channel_name or t.target_name or "")
            if label is None:
                n_no_match += 1
                continue
            t.content_category = label
            n_set += 1
        s.commit()
    print(f"set {n_set} crawl_target rows; {n_no_match} unchanged (no keyword match)")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())
