from pathlib import Path

import pytest

from govcrawler.config import loader, registry


GDQY_YAML = Path(__file__).resolve().parents[1] / "config" / "sites" / "gdqy.yaml"


def test_load_gdqy_yaml_parses():
    cfg = loader.load_site(GDQY_YAML)
    assert cfg.site_id == "gdqy"
    assert cfg.base_url.startswith("https://")
    assert cfg.default_strategy == "playwright"
    szfwj = cfg.get_column("szfwj")
    assert szfwj is not None
    assert "article-content" in szfwj.detail.content
    assert szfwj.pagination.type == "page_param"


def test_load_sites_dir_picks_gdqy():
    sites = loader.load_sites_dir(GDQY_YAML.parent)
    assert "gdqy" in sites
    assert sites["gdqy"].columns[0].column_id == "szfwj"


def test_invalid_strategy_rejected(tmp_path):
    bad = tmp_path / "bogus.yaml"
    bad.write_text(
        "site_id: bogus\nsite_name: x\nbase_url: https://x\n"
        "default_strategy: banana\ncolumns: []\n",
        encoding="utf-8",
    )
    with pytest.raises(Exception):
        loader.load_site(bad)


def test_filename_stem_must_match_site_id(tmp_path):
    (tmp_path / "wrong.yaml").write_text(
        "site_id: gdqy\nsite_name: x\nbase_url: https://x\ncolumns: []\n",
        encoding="utf-8",
    )
    with pytest.raises(ValueError):
        loader.load_sites_dir(tmp_path)


def test_registry_returns_yaml_selectors_for_gdqy():
    registry.reload()
    sel = registry.get_detail_selectors("gdqy", "szfwj")
    assert sel is not None
    # parity with hardcoded sites/gdqy.py
    from govcrawler.sites.gdqy import DETAIL_SELECTORS
    assert sel["content"] == DETAIL_SELECTORS["content"]
    assert sel["title"] == DETAIL_SELECTORS["title"]


def test_registry_returns_none_for_unknown_site():
    registry.reload()
    assert registry.get_detail_selectors("unknown_site", "x") is None


def test_registry_fallback_when_no_yaml(monkeypatch, tmp_path):
    # point registry at empty dir — gdqy YAML won't load, but legacy fallback kicks in
    monkeypatch.setattr(registry, "DEFAULT_CONFIG_DIR", tmp_path)
    registry.reload()
    sel = registry.get_detail_selectors("gdqy", "szfwj")
    assert sel is not None  # fallback to sites/gdqy.py
    # cleanup for other tests
    registry.reload()
