"""Hard-coded selectors for gdqy.gov.cn's 市政府文件 (szfwj) column.
Phase 2 will migrate to YAML; for Phase 1 PoC we accept hardcoding per CONTEXT.md LOCKED.
"""
from __future__ import annotations

SITE_ID = "gdqy"
SITE_NAME = "清远市人民政府"
SITE_BASE_URL = "https://www.gdqy.gov.cn"

COLUMN_ID = "szfwj"
COLUMN_NAME = "市政府文件"
COLUMN_CATEGORY = "法规文件"
COLUMN_LIST_URL = "https://www.gdqy.gov.cn/gdqy/newxxgk/fgwj/szfwj/"

# 已知测试 URL（无附件）
TARGET_ARTICLE_URL = (
    "https://www.gdqy.gov.cn/gdqy/newxxgk/fgwj/szfwj/content/post_2136593.html"
)
TARGET_ARTICLE_KEY = "post_2136593"

# 列表页 selectors —— parsel CSS
# Phase 1 执行者若实测 gdqy 列表结构与下面不符，需要更新
# （这是 PoC 环节预期的唯一手工调整点）
LIST_SELECTORS: dict = {
    "row": "ul.news_list li, ul.list_news li, div.list li",  # 多候选；政务站常用模板
    "href": "a::attr(href)",
    "title": "a::text",
    "date": "span.date::text, span.time::text, em::text",
}

# 详情页 selectors
DETAIL_SELECTORS: dict = {
    "title": "h1.article-title::text, h1::text, div.article-title::text",
    "publish_time": "span.time::text, div.info span::text, em.time::text",
    "source": "span.source::text, div.info em::text",
    "content": "div.article-content, div.content, div.TRS_Editor, div.article",
    "attachment_css": (
        "a[href$='.pdf'], a[href$='.doc'], a[href$='.docx'], "
        "a[href$='.xls'], a[href$='.xlsx'], a[href$='.zip'], "
        "a[href*='.pdf?'], a[href*='.doc?'], a[href*='.docx?']"
    ),
}

# Convenience aliases for Plan 03 backward compat
SZFWJ_LIST_URL = COLUMN_LIST_URL
SZFWJ_SITE_ID = SITE_ID
SZFWJ_COLUMN_ID = COLUMN_ID
SZFWJ_LIST_SELECTORS = LIST_SELECTORS
SZFWJ_DETAIL_SELECTORS = DETAIL_SELECTORS
SZFWJ_SELECTORS = DETAIL_SELECTORS  # legacy alias
SZFWJ_ATTACHMENT_CSS = DETAIL_SELECTORS["attachment_css"]

# Optional: 带附件的文章 URL (for STORE-03/05 secondary smoke test).
# If None, attachment path is covered only by tests/test_pipeline_mocked.py (httpx mocked).
ATTACHMENT_TEST_ARTICLE_URL: str | None = None
ATTACHMENT_TEST_ARTICLE_KEY: str | None = None
