"""Page-level checkpoint for big targets.

Revision ID: 0009_add_target_checkpoint
Revises: 0008_add_admin_audit_log
Create Date: 2026-04-29

Two columns:
  • crawl_target.track_checkpoint  bool, default False
      opt-in flag set per-target. When True, crawl_target() records
      progress in crawl_job.last_completed_page after each list page is
      fully processed, and a restart resumes from page+1.
  • crawl_job.last_completed_page  int, default 0
      page index just completed. 0 = nothing completed yet (default).
      operator can PATCH this manually to force a different resume point.

Default off so existing targets keep current behavior. Only enabled when
ops opt in for big WAF-sensitive targets (gd_wjk__qbwj, gd_gkmlpt full
backfills) where the 50-min list re-walk after a restart is expensive.
"""
from alembic import op
import sqlalchemy as sa


revision = "0009_add_target_checkpoint"
down_revision = "0008_add_admin_audit_log"
branch_labels = None
depends_on = None


def upgrade() -> None:
    op.add_column(
        "crawl_target",
        sa.Column(
            "track_checkpoint", sa.Boolean,
            server_default=sa.text("false"), nullable=False,
        ),
    )
    op.add_column(
        "crawl_job",
        sa.Column(
            "last_completed_page", sa.Integer,
            server_default=sa.text("0"), nullable=False,
        ),
    )


def downgrade() -> None:
    op.drop_column("crawl_job", "last_completed_page")
    op.drop_column("crawl_target", "track_checkpoint")
