"""Durable job queue table.

Revision ID: 0007_add_crawl_job
Revises: 0006_add_site_schedule_cron
Create Date: 2026-04-29

The TaskQueue's JobInfo previously lived in process memory only — every
queued / running job died on container restart. For 全量回溯 + long
batch runs this was lossy. This migration adds the `crawl_job` table that
mirrors the in-memory state machine (queued/running/done/failed/cancelled).

Boot sequence after this lands:
  1. on api startup, orphaned `status='running'` rows are flipped to
     `failed` with `error_msg='restart_during_run'`
  2. surviving `status='queued'` rows are re-pushed into the in-memory FIFO
     in their original enqueue order

Other status values are terminal (done/failed/cancelled) — kept around for
the UI history pane (no in-memory HISTORY_KEEP=200 cap any more).
"""
from alembic import op
import sqlalchemy as sa


revision = "0007_add_crawl_job"
down_revision = "0006_add_site_schedule_cron"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.create_table(
        "crawl_job",
        sa.Column("job_id", sa.String(32), primary_key=True),
        sa.Column("host", sa.String(255), nullable=False),
        sa.Column("site_code", sa.String(64), nullable=False),
        sa.Column("target_code", sa.String(128), nullable=False),
        sa.Column("source", sa.String(20), nullable=False),
        sa.Column("status", sa.String(20), nullable=False,
                  server_default=sa.text("'queued'")),
        sa.Column("force", sa.Boolean, server_default=sa.text("false")),
        sa.Column("stop_requested", sa.Boolean, server_default=sa.text("false")),
        sa.Column("attempt_count", sa.Integer, server_default=sa.text("0")),
        sa.Column("enqueued_at", sa.DateTime, server_default=sa.func.now()),
        sa.Column("started_at", sa.DateTime),
        sa.Column("finished_at", sa.DateTime),
        sa.Column("error_msg", sa.Text),
        sa.Column("result_json", sa.JSON),
    )
    op.create_index("ix_crawl_job_host", "crawl_job", ["host"])
    op.create_index("ix_crawl_job_site_code", "crawl_job", ["site_code"])
    op.create_index("ix_crawl_job_target_code", "crawl_job", ["target_code"])
    op.create_index("ix_crawl_job_status", "crawl_job", ["status"])
    op.create_index(
        "ix_crawl_job_status_enqueued",
        "crawl_job",
        ["status", "enqueued_at"],
    )


def downgrade() -> None:
    op.drop_index("ix_crawl_job_status_enqueued", table_name="crawl_job")
    op.drop_index("ix_crawl_job_status", table_name="crawl_job")
    op.drop_index("ix_crawl_job_target_code", table_name="crawl_job")
    op.drop_index("ix_crawl_job_site_code", table_name="crawl_job")
    op.drop_index("ix_crawl_job_host", table_name="crawl_job")
    op.drop_table("crawl_job")