"""Initial schema: sites, columns, articles, attachments, crawl_logs

Revision ID: 0001_initial
Revises:
Create Date: 2026-04-22

"""
from alembic import op
import sqlalchemy as sa

revision = "0001_initial"
down_revision = None
branch_labels = None
depends_on = None


def upgrade() -> None:
    # --- sites ---
    op.create_table(
        "sites",
        sa.Column("id", sa.BigInteger, primary_key=True),
        sa.Column("site_id", sa.String(64), nullable=False),
        sa.Column("name", sa.Text),
        sa.Column("base_url", sa.Text),
        sa.Column("config_json", sa.JSON),
        sa.Column("enabled", sa.Boolean, server_default=sa.text("true")),
        sa.Column("created_at", sa.DateTime, server_default=sa.func.now()),
    )
    op.create_index("ix_sites_site_id", "sites", ["site_id"], unique=True)

    # --- columns ---
    op.create_table(
        "columns",
        sa.Column("id", sa.BigInteger, primary_key=True),
        sa.Column("site_id", sa.String(64), nullable=False),
        sa.Column("column_id", sa.String(64), nullable=False),
        sa.Column("name", sa.Text),
        sa.Column("category", sa.String(64)),
        sa.Column("list_url", sa.Text),
        sa.Column("config_json", sa.JSON),
        sa.Column("last_crawled_at", sa.DateTime),
        sa.Column("last_article_time", sa.DateTime),
        sa.Column("enabled", sa.Boolean, server_default=sa.text("true")),
    )
    op.create_index(
        "ix_columns_site_col", "columns", ["site_id", "column_id"], unique=True
    )

    # --- articles ---
    op.create_table(
        "articles",
        sa.Column("id", sa.BigInteger, primary_key=True),
        sa.Column("site_id", sa.String(64), nullable=False),
        sa.Column("column_id", sa.String(64), nullable=False),
        sa.Column("category", sa.String(64)),
        sa.Column("url", sa.Text, nullable=False),
        sa.Column("url_hash", sa.CHAR(64), nullable=False),
        sa.Column("content_simhash", sa.CHAR(16)),
        sa.Column("title", sa.Text),
        sa.Column("publish_time", sa.DateTime),
        sa.Column("source", sa.String(128)),
        sa.Column("content_text", sa.Text),
        sa.Column("raw_html_path", sa.Text),
        sa.Column("text_path", sa.Text),
        sa.Column("has_attachment", sa.Boolean, server_default=sa.text("false")),
        sa.Column("status", sa.String(16), server_default="raw"),
        sa.Column("fetch_strategy", sa.String(32)),
        sa.Column("fetched_at", sa.DateTime, server_default=sa.func.now()),
        sa.Column("exported_to_rag_at", sa.DateTime),
    )
    op.create_index(
        "ix_articles_url_hash", "articles", ["url_hash"], unique=True
    )
    op.create_index(
        "ix_articles_site_col_pub",
        "articles",
        ["site_id", "column_id", "publish_time"],
    )
    op.create_index("ix_articles_exported", "articles", ["exported_to_rag_at"])

    # --- attachments ---
    op.create_table(
        "attachments",
        sa.Column("id", sa.BigInteger, primary_key=True),
        sa.Column(
            "article_id",
            sa.BigInteger,
            sa.ForeignKey("articles.id", ondelete="CASCADE"),
            nullable=False,
        ),
        sa.Column("file_name", sa.Text),
        sa.Column("file_ext", sa.String(16)),
        sa.Column("size_bytes", sa.BigInteger),
        sa.Column("file_path", sa.Text),
        sa.Column("file_hash", sa.CHAR(64)),
        sa.Column("downloaded_at", sa.DateTime, server_default=sa.func.now()),
    )
    op.create_index("ix_attachments_file_hash", "attachments", ["file_hash"])

    # --- crawl_logs ---
    op.create_table(
        "crawl_logs",
        sa.Column("id", sa.BigInteger, primary_key=True),
        sa.Column("site_id", sa.String(64)),
        sa.Column("column_id", sa.String(64)),
        sa.Column("article_url", sa.Text),
        sa.Column("strategy", sa.String(32)),
        sa.Column("http_status", sa.Integer),
        sa.Column("duration_ms", sa.Integer),
        sa.Column("success", sa.Boolean),
        sa.Column("error_msg", sa.Text),
        sa.Column("occurred_at", sa.DateTime, server_default=sa.func.now()),
    )


def downgrade() -> None:
    # 按依赖顺序反向删除
    op.drop_table("crawl_logs")
    op.drop_index("ix_attachments_file_hash", table_name="attachments")
    op.drop_table("attachments")
    op.drop_index("ix_articles_exported", table_name="articles")
    op.drop_index("ix_articles_site_col_pub", table_name="articles")
    op.drop_index("ix_articles_url_hash", table_name="articles")
    op.drop_table("articles")
    op.drop_index("ix_columns_site_col", table_name="columns")
    op.drop_table("columns")
    op.drop_index("ix_sites_site_id", table_name="sites")
    op.drop_table("sites")
