"""CMS adapter layer (§7.5 of 2.0 design doc).

Each adapter normalizes a CMS family (e.g. gkmlpt = 清远政务云) into the
canonical CrawlItem contract defined in `contract`.

Adapter internals vary freely (JSON APIs, HTML scraping, headless browser…),
but outputs must be the same Pydantic models so pipeline/RAG interfaces stay
stable. See `contract.py` for the stable boundary.
"""

from govcrawler.adapters import gkmlpt, gov_cn_policy
from govcrawler.adapters.contract import (
    AttachmentItem,
    ContractViolation,
    CrawlItem,
    CrawlLogItem,
    FetchStrategy,
    Status,
)

# Adapter registry — keyed by `crawl_site.cms_adapter`. Sites with
# `cms_adapter IS NULL` (yaml_path-driven legacy HTML scrape) don't go
# through this map; they hit the direct-scrape fetcher instead.
ADAPTERS = {
    gkmlpt.ADAPTER_ID: gkmlpt,
    gov_cn_policy.ADAPTER_ID: gov_cn_policy,
}


def get_adapter(name: str):
    """Return the adapter module for `name`, raising KeyError if unknown."""
    try:
        return ADAPTERS[name]
    except KeyError:
        raise KeyError(
            f"unknown cms_adapter {name!r}; registered: {sorted(ADAPTERS)}"
        ) from None


__all__ = [
    "ADAPTERS",
    "AttachmentItem",
    "ContractViolation",
    "CrawlItem",
    "CrawlLogItem",
    "FetchStrategy",
    "Status",
    "get_adapter",
    "gkmlpt",
    "gov_cn_policy",
]
