"""SQLAlchemy 2.0 ORM — projection of the canonical contract in
`govcrawler.adapters.contract`.

Schema layout follows §5 of the 2.0 design doc:

    crawl_site (domain-level, 9 Qingyuan sites today)
        └── site_department (dept_path-level; Qingyuan cluster increment)
                └── crawl_target (column_id-level)
                        └── article / crawl_log
    local_department (OA snapshot, independent table)
    attachment (child of article)
    master_column_registry (read-only view over adapter classify calls)

Design guarantees enforced at the DB layer (matches contract.py one-to-one):
- **`dept_binding` on site_department** is a string enum with CHECK constraint:
  `pending / mapped / city_level / cross_dept / external_ref`
- **`crawl_target.site_department_id`** is nullable —走 YAML 站 / 外部参考站 leave
  it empty; 走适配器站 must point to a site_department row
- **`article.native_post_id`** is nullable; UNIQUE index is conditional so
  rows without a native id don't collide. On PG the partial index is native;
  MySQL would need a sentinel — we stay PG-friendly here
- **No `content_simhash`** — §5.5 (old §5.4) explicitly excluded it; RAG owns
  content de-dup. Adapter contract rejects it too.
"""
from __future__ import annotations

from datetime import date, datetime
from typing import Any

from sqlalchemy import (
    BigInteger,
    Boolean,
    CHAR,
    CheckConstraint,
    Date,
    DateTime,
    ForeignKey,
    Index,
    Integer,
    JSON,
    String,
    Text,
    UniqueConstraint,
    column,
    func,
    text,
)
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship

# Autoincrement-friendly BigInt PK:
# - PG/MySQL get BIGINT (full range)
# - SQLite gets INTEGER (aliases rowid → autoincrement "just works" in tests)
_PK_BIG = BigInteger().with_variant(Integer(), "sqlite")


class Base(DeclarativeBase):
    pass


# ---------------------------------------------------------------------------
# §5.1 crawl_site — domain-level
# ---------------------------------------------------------------------------
class CrawlSite(Base):
    __tablename__ = "crawl_site"

    id: Mapped[int] = mapped_column(_PK_BIG, primary_key=True, autoincrement=True)
    site_code: Mapped[str] = mapped_column(String(50), unique=True, nullable=False)
    site_name: Mapped[str | None] = mapped_column(String(200))
    base_url: Mapped[str | None] = mapped_column(String(500))
    site_role: Mapped[str | None] = mapped_column(String(50))  # qingyuan_local / county_local / province_ref / nation_ref
    cms_adapter: Mapped[str | None] = mapped_column(String(50))  # Qingyuan-增强: gkmlpt / trs_wcm / ...
    adapter_params_json: Mapped[dict[str, Any] | None] = mapped_column(JSON)
    default_fetch_strategy: Mapped[str | None] = mapped_column(String(50))  # httpx / playwright
    strategy_override_reason: Mapped[str | None] = mapped_column(String(200))
    respect_robots: Mapped[bool] = mapped_column(Boolean, server_default=text("true"))
    yaml_path: Mapped[str | None] = mapped_column(String(500))
    # §5.1 [2.0-增补] managed_by: 'yaml' (默认，sync_dir 托管) 或 'ui' (管理后台
    # 托管，sync_dir 永不改动)。允许运维混合两种配置源：YAML 行仍走铁律，
    # UI 行直接在 DB 上增删改。
    managed_by: Mapped[str] = mapped_column(String(10), server_default="yaml", nullable=False)
    enabled: Mapped[bool] = mapped_column(Boolean, server_default=text("true"))
    remark: Mapped[str | None] = mapped_column(String(500))
    created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
    updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())

    # Relationships
    site_departments: Mapped[list["SiteDepartment"]] = relationship(
        back_populates="site", cascade="all"
    )
    targets: Mapped[list["CrawlTarget"]] = relationship(
        back_populates="site", cascade="all"
    )

    __table_args__ = (
        # §5.1 约束：cms_adapter 与 yaml_path 恰好一个非空。基线默认走 yaml_path。
        CheckConstraint(
            "(cms_adapter IS NULL) <> (yaml_path IS NULL)",
            name="ck_crawl_site_adapter_xor_yaml",
        ),
        CheckConstraint(
            "managed_by IN ('yaml','ui')",
            name="ck_crawl_site_managed_by",
        ),
    )


# ---------------------------------------------------------------------------
# §5.2 site_department — dept_path level [Qingyuan 专项增强]
# ---------------------------------------------------------------------------
DEPT_BINDINGS: tuple[str, ...] = (
    "pending",
    "mapped",
    "city_level",
    "cross_dept",
    "external_ref",
)


class SiteDepartment(Base):
    __tablename__ = "site_department"

    id: Mapped[int] = mapped_column(_PK_BIG, primary_key=True, autoincrement=True)
    site_id: Mapped[int] = mapped_column(
        BigInteger, ForeignKey("crawl_site.id", ondelete="CASCADE"), nullable=False
    )
    dept_path: Mapped[str] = mapped_column(String(100), nullable=False)
    local_dept_id: Mapped[int | None] = mapped_column(
        BigInteger, ForeignKey("local_department.dept_id", ondelete="SET NULL")
    )
    dept_binding: Mapped[str] = mapped_column(
        String(20), nullable=False, server_default="pending"
    )
    dept_display_name: Mapped[str | None] = mapped_column(String(200))
    detect_status: Mapped[str] = mapped_column(String(20), server_default="pending")  # pending / ok / failed
    last_probed_at: Mapped[datetime | None] = mapped_column(DateTime)
    last_classify_json: Mapped[dict[str, Any] | None] = mapped_column(JSON)
    enabled: Mapped[bool] = mapped_column(Boolean, server_default=text("true"))
    created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
    updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())

    site: Mapped[CrawlSite] = relationship(back_populates="site_departments")
    local_dept: Mapped["LocalDepartment | None"] = relationship()
    targets: Mapped[list["CrawlTarget"]] = relationship(back_populates="site_department")

    __table_args__ = (
        UniqueConstraint("site_id", "dept_path", name="uq_site_department_path"),
        CheckConstraint(
            "dept_binding IN ('pending','mapped','city_level','cross_dept','external_ref')",
            name="ck_site_department_binding",
        ),
        # mapped ⇒ local_dept_id 必填；其他四种 ⇒ local_dept_id 必须为空
        CheckConstraint(
            "(dept_binding = 'mapped' AND local_dept_id IS NOT NULL) OR "
            "(dept_binding <> 'mapped' AND local_dept_id IS NULL)",
            name="ck_site_department_binding_local_dept",
        ),
    )


# ---------------------------------------------------------------------------
# §5.3 crawl_target — column_id level
# ---------------------------------------------------------------------------
class CrawlTarget(Base):
    __tablename__ = "crawl_target"

    id: Mapped[int] = mapped_column(_PK_BIG, primary_key=True, autoincrement=True)
    site_id: Mapped[int] = mapped_column(
        BigInteger, ForeignKey("crawl_site.id", ondelete="CASCADE"), nullable=False
    )
    site_department_id: Mapped[int | None] = mapped_column(
        BigInteger, ForeignKey("site_department.id", ondelete="CASCADE")
    )
    target_code: Mapped[str] = mapped_column(String(100), unique=True, nullable=False)
    target_name: Mapped[str | None] = mapped_column(String(200))
    entry_url: Mapped[str | None] = mapped_column(String(1000))
    sample_article_url: Mapped[str | None] = mapped_column(String(1000))
    dept_id: Mapped[int | None] = mapped_column(
        BigInteger, ForeignKey("local_department.dept_id", ondelete="SET NULL")
    )
    parser_override_json: Mapped[dict[str, Any] | None] = mapped_column(JSON)
    channel_name: Mapped[str | None] = mapped_column(String(200))
    channel_path: Mapped[str | None] = mapped_column(String(1000))
    content_category: Mapped[str | None] = mapped_column(String(100))
    content_subcategory: Mapped[str | None] = mapped_column(String(100))
    schedule_cron: Mapped[str | None] = mapped_column(String(100))
    expected_cadence_days: Mapped[int] = mapped_column(Integer, server_default="30")
    interval_sec: Mapped[int | None] = mapped_column(Integer)
    enabled: Mapped[bool] = mapped_column(Boolean, server_default=text("true"))
    last_crawled_at: Mapped[datetime | None] = mapped_column(DateTime)
    last_article_time: Mapped[datetime | None] = mapped_column(DateTime)
    created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
    updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())

    site: Mapped[CrawlSite] = relationship(back_populates="targets")
    site_department: Mapped[SiteDepartment | None] = relationship(back_populates="targets")
    local_dept: Mapped["LocalDepartment | None"] = relationship()


# ---------------------------------------------------------------------------
# §5.4 local_department — OA snapshot
# ---------------------------------------------------------------------------
class LocalDepartment(Base):
    __tablename__ = "local_department"

    dept_id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
    dept_name: Mapped[str | None] = mapped_column(String(250))
    parent_dept_id: Mapped[int | None] = mapped_column(BigInteger)
    dept_level: Mapped[int | None] = mapped_column(Integer)
    dept_code: Mapped[str | None] = mapped_column(String(250))
    short_name: Mapped[str | None] = mapped_column(String(50))
    full_name: Mapped[str | None] = mapped_column(String(500))
    state: Mapped[int | None] = mapped_column(Integer)
    order_id: Mapped[int | None] = mapped_column(Integer)
    updated_at: Mapped[datetime | None] = mapped_column(DateTime)
    region: Mapped[str | None] = mapped_column(String(50), index=True)


# ---------------------------------------------------------------------------
# §5.5 article
# ---------------------------------------------------------------------------
class Article(Base):
    __tablename__ = "article"

    id: Mapped[int] = mapped_column(_PK_BIG, primary_key=True, autoincrement=True)
    site_id: Mapped[int] = mapped_column(
        BigInteger, ForeignKey("crawl_site.id", ondelete="RESTRICT"), nullable=False
    )
    target_id: Mapped[int | None] = mapped_column(
        BigInteger, ForeignKey("crawl_target.id", ondelete="SET NULL")
    )
    dept_id: Mapped[int | None] = mapped_column(
        BigInteger, ForeignKey("local_department.dept_id", ondelete="SET NULL")
    )
    native_post_id: Mapped[str | None] = mapped_column(String(64))
    url: Mapped[str] = mapped_column(Text, nullable=False)
    url_hash: Mapped[str] = mapped_column(CHAR(64), nullable=False)
    title: Mapped[str | None] = mapped_column(Text)
    publish_time: Mapped[datetime | None] = mapped_column(DateTime)
    source_raw: Mapped[str | None] = mapped_column(String(500))
    channel_name: Mapped[str | None] = mapped_column(String(200))
    channel_path: Mapped[str | None] = mapped_column(String(1000))
    content_category: Mapped[str | None] = mapped_column(String(100))
    content_subcategory: Mapped[str | None] = mapped_column(String(100))
    index_no: Mapped[str | None] = mapped_column(String(200))
    doc_no: Mapped[str | None] = mapped_column(String(200))
    publisher: Mapped[str | None] = mapped_column(String(500))
    publish_date: Mapped[date | None] = mapped_column(Date)
    effective_date: Mapped[date | None] = mapped_column(Date)
    topic_words: Mapped[str | None] = mapped_column(String(500))
    open_category: Mapped[str | None] = mapped_column(String(200))
    metadata_json: Mapped[dict[str, Any] | None] = mapped_column(JSON)
    content_text: Mapped[str | None] = mapped_column(Text)
    raw_html_path: Mapped[str | None] = mapped_column(Text)
    text_path: Mapped[str | None] = mapped_column(Text)
    has_attachment: Mapped[bool] = mapped_column(Boolean, server_default=text("false"))
    status: Mapped[str] = mapped_column(String(20), server_default="raw")  # raw / ready / failed
    fetch_strategy: Mapped[str | None] = mapped_column(String(50))
    fetched_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
    exported_to_rag_at: Mapped[datetime | None] = mapped_column(DateTime)
    created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
    updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())

    attachments: Mapped[list["Attachment"]] = relationship(
        back_populates="article", cascade="all, delete-orphan"
    )
    site: Mapped[CrawlSite] = relationship(foreign_keys=[site_id])
    target: Mapped["CrawlTarget | None"] = relationship(foreign_keys=[target_id])

    __table_args__ = (
        # §5.5 去重三条铁律。
        # 1) url_hash 永远 NOT NULL + UNIQUE（DB 级兜底，所有行生效）
        UniqueConstraint("url_hash", name="uq_article_url_hash"),
        # 2) (site_id, native_post_id) 当 native_post_id 非空时唯一 —— PG 以 partial index 表达
        Index(
            "uq_article_site_native",
            "site_id",
            "native_post_id",
            unique=True,
            postgresql_where=column("native_post_id").isnot(None),
        ),
        Index("ix_article_site_pub", "site_id", "publish_time"),
        Index("ix_article_target_pub", "target_id", "publish_time"),
        Index("ix_article_exported", "exported_to_rag_at"),
        Index("ix_article_status", "status"),
        CheckConstraint(
            "status IN ('raw','ready','failed')",
            name="ck_article_status",
        ),
    )


# ---------------------------------------------------------------------------
# §5.6 attachment
# ---------------------------------------------------------------------------
class Attachment(Base):
    __tablename__ = "attachment"

    id: Mapped[int] = mapped_column(_PK_BIG, primary_key=True, autoincrement=True)
    article_id: Mapped[int] = mapped_column(
        BigInteger, ForeignKey("article.id", ondelete="CASCADE"), nullable=False
    )
    file_name: Mapped[str | None] = mapped_column(Text)
    file_ext: Mapped[str | None] = mapped_column(String(20))
    size_bytes: Mapped[int | None] = mapped_column(BigInteger)
    file_path: Mapped[str | None] = mapped_column(Text)
    file_hash: Mapped[str | None] = mapped_column(CHAR(64))
    downloaded_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())

    article: Mapped[Article] = relationship(back_populates="attachments")

    __table_args__ = (Index("ix_attachment_file_hash", "file_hash"),)


# ---------------------------------------------------------------------------
# §5.7 crawl_log
# ---------------------------------------------------------------------------
class CrawlLog(Base):
    __tablename__ = "crawl_log"

    id: Mapped[int] = mapped_column(_PK_BIG, primary_key=True, autoincrement=True)
    site_id: Mapped[int | None] = mapped_column(
        BigInteger, ForeignKey("crawl_site.id", ondelete="SET NULL")
    )
    target_id: Mapped[int | None] = mapped_column(
        BigInteger, ForeignKey("crawl_target.id", ondelete="SET NULL")
    )
    article_url: Mapped[str | None] = mapped_column(Text)
    strategy: Mapped[str | None] = mapped_column(String(50))
    http_status: Mapped[int | None] = mapped_column(Integer)
    duration_ms: Mapped[int | None] = mapped_column(Integer)
    success: Mapped[bool] = mapped_column(Boolean, nullable=False)
    error_msg: Mapped[str | None] = mapped_column(Text)
    occurred_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())

    __table_args__ = (
        Index("ix_crawl_log_site_occurred", "site_id", "occurred_at"),
        Index("ix_crawl_log_target_occurred", "target_id", "occurred_at"),
    )


# ---------------------------------------------------------------------------
# §5.8 master_column_registry [Qingyuan 专项增强]
# ---------------------------------------------------------------------------
class MasterColumnRegistry(Base):
    __tablename__ = "master_column_registry"

    id: Mapped[int] = mapped_column(_PK_BIG, primary_key=True, autoincrement=True)
    adapter_id: Mapped[str] = mapped_column(String(50), nullable=False)
    site_id: Mapped[int] = mapped_column(
        BigInteger, ForeignKey("crawl_site.id", ondelete="CASCADE"), nullable=False
    )
    column_id: Mapped[str] = mapped_column(String(64), nullable=False)
    column_name: Mapped[str | None] = mapped_column(String(500))
    column_path: Mapped[str | None] = mapped_column(String(1000))
    admin_level: Mapped[str | None] = mapped_column(String(20))  # city / county / township
    topic_tags: Mapped[list[str] | None] = mapped_column(JSON)
    post_count: Mapped[int | None] = mapped_column(Integer)
    last_seen_at: Mapped[datetime | None] = mapped_column(DateTime)
    subscribed_target_id: Mapped[int | None] = mapped_column(
        BigInteger, ForeignKey("crawl_target.id", ondelete="SET NULL")
    )
    active: Mapped[bool] = mapped_column(Boolean, server_default=text("true"))
    created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
    updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())

    __table_args__ = (
        UniqueConstraint(
            "adapter_id", "site_id", "column_id", name="uq_mcr_adapter_site_col"
        ),
    )


__all__ = [
    "Base",
    "CrawlSite",
    "SiteDepartment",
    "CrawlTarget",
    "LocalDepartment",
    "Article",
    "Attachment",
    "CrawlLog",
    "MasterColumnRegistry",
    "DEPT_BINDINGS",
]
