from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        extra="ignore",
    )

    db_url: str
    data_dir: str = "./data/govcrawler"
    user_agent: str = "GovCrawlerBot/1.0 (contact: xxx@example.com)"
    # Cookie pool (FETCH-04). Empty/unset → in-memory fallback (dev/tests).
    valkey_url: str = ""
    # Cookie TTL per design doc §4.2 (4 hours).
    cookie_ttl_s: int = 4 * 3600
    # Alerting (OBS-03). Empty URL → alerting disabled.
    alert_webhook_url: str = ""
    alert_provider: str = "feishu"   # "feishu" | "wechat"
    # Admin dashboard credentials. Empty admin_user => auth disabled (dev only).
    # In prod set ADMIN_USER / ADMIN_PASSWORD via .env.
    admin_user: str = ""
    admin_password: str = ""
    # Narrow operator account: can only use the standard attachment downloader
    # screen and the captcha-backed OpenSTD download endpoints.
    standard_downloader_user: str = ""
    standard_downloader_password: str = ""
    # RAG consumer Bearer token. Empty => RAG endpoints (/api/articles**,
    # /api/raw-html/**, /api/text/**, /api/articles/.../attachments/...,
    # /api/articles/.../ack) return 401. In prod set RAG_API_TOKEN via .env
    # to a long random string and share with the downstream RAG service.
    rag_api_token: str = ""
    # Legacy zm-rag ingest webhook retained for older integrations.
    # Gov Public KB rag-export ignores these RAG_* values.
    rag_ingest_url: str = ""
    rag_status_url: str = ""
    rag_ingest_token: str = ""
    # Dedicated zm-rag endpoint for the government public KB.
    # These settings are intentionally independent from the legacy RAG_* fields.
    rag_gov_public_kb_ingest_url: str = ""
    rag_gov_public_kb_status_url: str = ""
    rag_gov_public_kb_ingest_token: str = ""
    rag_export_batch_size: int = 50
    rag_export_timeout_s: float = 60.0
    rag_export_wait_completion: bool = True
    rag_export_poll_interval_s: float = 2.0
    rag_export_poll_timeout_s: float = 1800.0
    rag_export_running_stale_s: float = 21600.0
    rag_export_after_crawl_enabled: bool = False
    # Attachment downloads are secondary to article ingestion. Keep their
    # timeout short so a reset/slow PDF host cannot block the article row for
    # a full minute.
    attachment_timeout_s: float = 15.0
    # Do not reuse the article-detail throttle for attachments. Some targets
    # use 120-180s detail spacing; applying that again before a PDF keeps the
    # already-fetched article out of the DB for minutes.
    attachment_throttle_cap_s: float = 5.0
    # Guangdong provincial sites are WAF-sensitive during office hours. These
    # knobs slow only runtime request spacing; they do not rewrite DB/YAML.
    gd_worktime_interval_multiplier: float = 1.2
    gd_worktime_start_hour: int = 8
    gd_worktime_end_hour: int = 18
    # When a target triggers host_cooldown, grow its DB interval by 10%.
    cooldown_interval_growth_factor: float = 1.10
    cooldown_interval_max_sec: int = 600


def get_settings() -> Settings:
    return Settings()  # 每次读一次；本阶段量小不做 lru_cache
