# syntax=docker/dockerfile:1.6
FROM python:3.11-slim AS base

ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    UV_LINK_MODE=copy \
    UV_PROJECT_ENVIRONMENT=/opt/venv \
    PATH=/opt/venv/bin:$PATH

# System deps for patchright/chromium + parsing libs (lxml)
RUN apt-get update && apt-get install -y --no-install-recommends \
      ca-certificates curl git libxml2 libxslt1.1 \
      fonts-noto-cjk \
      # Chromium runtime libs (needed even for headless)
      libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libxkbcommon0 \
      libxcomposite1 libxdamage1 libxrandr2 libgbm1 libpango-1.0-0 libcairo2 \
      libasound2 libatspi2.0-0 \
    && rm -rf /var/lib/apt/lists/*

# Install uv (fast resolver)
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/

WORKDIR /app

# Dependency layer — cache-friendly
COPY pyproject.toml uv.lock ./
RUN uv sync --frozen --no-install-project

# Install Chromium browser via patchright.
# Gated on SKIP_CHROMIUM build arg so deployments behind firewalls that
# can't reach the Google Chromium CDN can skip it. PLAYWRIGHT_DOWNLOAD_HOST
# lets us use a Chinese mirror (npmmirror / aliyun) when behind GFW —
# default points at the npmmirror-maintained playwright mirror so the
# build works in CN infra out of the box.
ARG SKIP_CHROMIUM=0
ARG PLAYWRIGHT_DOWNLOAD_HOST=https://cdn.npmmirror.com/binaries/playwright
RUN if [ "$SKIP_CHROMIUM" = "1" ]; then \
      echo ">>> SKIP_CHROMIUM=1 — chromium install skipped (playwright strategy disabled)"; \
    else \
      export PLAYWRIGHT_DOWNLOAD_HOST="$PLAYWRIGHT_DOWNLOAD_HOST" && \
      (timeout 900 uv run patchright install chromium --with-deps \
        || timeout 900 uv run patchright install chromium \
        || echo ">>> chromium install FAILED — continuing without it; playwright-strategy sites will error at runtime"); \
    fi

# App code
COPY . .
RUN uv sync --frozen

# Default: run scheduler. Override in docker-compose for serve/alembic.
CMD ["python", "-m", "govcrawler", "schedule"]
