"""CMS adapter layer (§7.5 of 2.0 design doc).

Each adapter normalizes a CMS family (e.g. gkmlpt = 清远政务云) into the
canonical CrawlItem contract defined in `contract`.

Adapter internals vary freely (JSON APIs, HTML scraping, headless browser…),
but outputs must be the same Pydantic models so pipeline/RAG interfaces stay
stable. See `contract.py` for the stable boundary.
"""

from govcrawler.adapters import flk_npc, gkmlpt, gov_cn_policy, openstd_samr, xinhua_xjp
from govcrawler.adapters.contract import (
    AttachmentItem,
    ContractViolation,
    CrawlItem,
    CrawlLogItem,
    FetchStrategy,
    Status,
)

# Adapter registry — keyed by `crawl_site.cms_adapter`. Sites with
# `cms_adapter IS NULL` (yaml_path-driven legacy HTML scrape) don't go
# through this map; they hit the direct-scrape fetcher instead.
ADAPTERS = {
    flk_npc.ADAPTER_ID: flk_npc,
    gkmlpt.ADAPTER_ID: gkmlpt,
    gov_cn_policy.ADAPTER_ID: gov_cn_policy,
    openstd_samr.ADAPTER_ID: openstd_samr,
    xinhua_xjp.ADAPTER_ID: xinhua_xjp,
}


def get_adapter(name: str):
    """Return the adapter module for `name`, raising KeyError if unknown."""
    try:
        return ADAPTERS[name]
    except KeyError:
        raise KeyError(
            f"unknown cms_adapter {name!r}; registered: {sorted(ADAPTERS)}"
        ) from None


__all__ = [
    "ADAPTERS",
    "AttachmentItem",
    "ContractViolation",
    "CrawlItem",
    "CrawlLogItem",
    "FetchStrategy",
    "Status",
    "get_adapter",
    "flk_npc",
    "gkmlpt",
    "gov_cn_policy",
    "openstd_samr",
    "xinhua_xjp",
]
