# syntax=docker/dockerfile:1.6
FROM python:3.11-slim AS base

ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    UV_LINK_MODE=copy \
    UV_PROJECT_ENVIRONMENT=/opt/venv \
    PATH=/opt/venv/bin:$PATH

# System deps for patchright/chromium + parsing libs (lxml)
RUN sed -i "s|deb.debian.org|mirrors.aliyun.com|g; s|security.debian.org|mirrors.aliyun.com|g" /etc/apt/sources.list.d/debian.sources && apt-get update && apt-get install -y --no-install-recommends \
      ca-certificates curl git libxml2 libxslt1.1 unzip \
      fonts-noto-cjk \
      # Chromium runtime libs (needed even for headless). libxfixes3 +
      # libxshmfence1 are required by chrome-headless-shell-1208 — patchright
      # install --with-deps would have brought them in, but we run with
      # SKIP_CHROMIUM=1 (binary lives on the host bind mount), so add
      # them explicitly here. Without these the launch fails with
      # 'libXfixes.so.3: cannot open shared object file'.
      libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 libxkbcommon0 \
      libxcomposite1 libxdamage1 libxrandr2 libgbm1 libpango-1.0-0 libcairo2 \
      libasound2 libatspi2.0-0 libxfixes3 libxshmfence1 \
    && rm -rf /var/lib/apt/lists/*

# Install uv (fast resolver)
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /usr/local/bin/

WORKDIR /app

# Dependency layer — cache-friendly
COPY pyproject.toml uv.lock ./
RUN uv sync --frozen --no-install-project

# Install Chromium browser via patchright.
# Gated on SKIP_CHROMIUM build arg so deployments behind firewalls that
# can't reach the Google Chromium CDN can skip it. PLAYWRIGHT_DOWNLOAD_HOST
# lets us use a Chinese mirror (npmmirror / aliyun) when behind GFW —
# default points at the npmmirror-maintained playwright mirror so the
# build works in CN infra out of the box.
ARG SKIP_CHROMIUM=0
ARG PLAYWRIGHT_DOWNLOAD_HOST=https://cdn.npmmirror.com/binaries/playwright
RUN if [ "$SKIP_CHROMIUM" = "1" ]; then \
      echo ">>> SKIP_CHROMIUM=1 — chromium install skipped (playwright strategy disabled)"; \
    else \
      export PLAYWRIGHT_DOWNLOAD_HOST="$PLAYWRIGHT_DOWNLOAD_HOST" && \
      (timeout 900 uv run patchright install chromium --with-deps \
        || timeout 900 uv run patchright install chromium \
        || echo ">>> chromium install FAILED — continuing without it; playwright-strategy sites will error at runtime"); \
    fi

# App code
COPY . .
RUN uv sync --frozen

# Default: run scheduler. Override in docker-compose for serve/alembic.
CMD ["python", "-m", "govcrawler", "schedule"]
