"""Runtime enable/disable for sites and columns (Phase 2 启停开关).

Rewrites the YAML file at `config/sites/<site>.yaml` in place using ruamel.yaml,
which preserves comments + key ordering. After rewrite, callers should call
`registry.reload()` to bust the in-process cache.

Intent: operators can flip a misbehaving column off without editing Python,
running a migration, or touching the DB. The YAML remains the single source of
truth — the scheduler skips disabled jobs on next reload, the crawler raises
if an explicit `crawl <site> <col>` hits a disabled column (we want loud failure
on manual invocation of something the operator just turned off).
"""
from __future__ import annotations

import logging
from pathlib import Path

from ruamel.yaml import YAML

from govcrawler.config.registry import DEFAULT_CONFIG_DIR, reload as reload_registry

log = logging.getLogger(__name__)

_yaml = YAML()
_yaml.preserve_quotes = True
_yaml.indent(mapping=2, sequence=4, offset=2)


def _site_path(site_id: str, root: Path | None = None) -> Path:
    root = root or DEFAULT_CONFIG_DIR
    p = Path(root) / f"{site_id}.yaml"
    if not p.exists():
        alt = Path(root) / f"{site_id}.yml"
        if alt.exists():
            return alt
        raise FileNotFoundError(f"no YAML for site {site_id!r} under {root}")
    return p


def set_enabled(
    site_id: str,
    *,
    column_id: str | None = None,
    enabled: bool,
    root: Path | None = None,
) -> Path:
    """Flip `enabled:` on a site or column YAML node; return the file path.

    - `column_id=None`: toggles the site-level `enabled`.
    - `column_id=<id>`: finds the matching column and toggles its `enabled`.
    """
    path = _site_path(site_id, root=root)
    with open(path, "r", encoding="utf-8") as f:
        data = _yaml.load(f)
    if data is None:
        raise ValueError(f"empty YAML: {path}")

    if column_id is None:
        data["enabled"] = bool(enabled)
        log.info("toggle site=%s enabled=%s", site_id, enabled)
    else:
        cols = data.get("columns") or []
        target = None
        for c in cols:
            if c.get("column_id") == column_id:
                target = c
                break
        if target is None:
            raise KeyError(f"column {column_id!r} not found in site {site_id!r}")
        target["enabled"] = bool(enabled)
        log.info("toggle site=%s column=%s enabled=%s", site_id, column_id, enabled)

    with open(path, "w", encoding="utf-8") as f:
        _yaml.dump(data, f)
    reload_registry()
    return path


def list_sites(root: Path | None = None) -> list[dict]:
    """Return a stable-ordered summary of every site + column with its enabled flag.

    Reads each YAML directly (not the pydantic loader) so disabled/broken configs
    still show up — operators need to see the full picture.
    """
    root = Path(root or DEFAULT_CONFIG_DIR)
    out: list[dict] = []
    if not root.is_dir():
        return out
    for path in sorted(root.iterdir()):
        if path.suffix.lower() not in (".yaml", ".yml"):
            continue
        try:
            with open(path, "r", encoding="utf-8") as f:
                data = _yaml.load(f) or {}
        except Exception as e:
            out.append({"file": path.name, "error": str(e)})
            continue
        site = {
            "site_id": data.get("site_id", path.stem),
            "site_name": data.get("site_name", ""),
            "enabled": bool(data.get("enabled", True)),
            "columns": [
                {
                    "column_id": c.get("column_id"),
                    "name": c.get("name", ""),
                    "enabled": bool(c.get("enabled", True)),
                    "schedule": c.get("schedule", ""),
                }
                for c in (data.get("columns") or [])
            ],
        }
        out.append(site)
    return out
