"""Crawl logs — recent entries + retry individual URLs."""
from __future__ import annotations

import logging
from typing import Any

from fastapi import BackgroundTasks, Depends, HTTPException, Query
from sqlalchemy import desc, select
from sqlalchemy.orm import Session

from govcrawler.models import CrawlLog, CrawlSite, CrawlTarget
from govcrawler.repositories import targets as targets_repo

from ._common import _session, router

log = logging.getLogger(__name__)


def _logs_base_stmt():
    """Select CrawlLog + joined site_code/target_code for display."""
    return (
        select(
            CrawlLog,
            CrawlSite.site_code.label("site_code"),
            CrawlTarget.target_code.label("target_code"),
        )
        .select_from(CrawlLog)
        .join(CrawlSite, CrawlSite.id == CrawlLog.site_id, isouter=True)
        .join(CrawlTarget, CrawlTarget.id == CrawlLog.target_id, isouter=True)
    )


@router.get("/api/logs")
def recent_logs(
    limit: int = Query(20, ge=1, le=200),
    offset: int = Query(0, ge=0),
    site: str | None = Query(None, description="crawl_site.site_code"),
    target: str | None = Query(None, description="crawl_target.target_code"),
    only_failed: bool = False,
    s: Session = Depends(_session),
) -> dict[str, Any]:
    """Server-side pagination — UI requests one page at a time, total
    returned for accurate page count rendering."""
    from sqlalchemy import func as _f
    filters = []
    if site:
        filters.append(CrawlSite.site_code == site)
    if target:
        filters.append(CrawlTarget.target_code == target)
    if only_failed:
        filters.append(CrawlLog.success.is_(False))

    count_stmt = (
        select(_f.count(CrawlLog.id))
        .select_from(CrawlLog)
        .join(CrawlSite, CrawlSite.id == CrawlLog.site_id, isouter=True)
        .join(CrawlTarget, CrawlTarget.id == CrawlLog.target_id, isouter=True)
    )
    for f in filters:
        count_stmt = count_stmt.where(f)
    total = s.execute(count_stmt).scalar() or 0

    stmt = _logs_base_stmt().order_by(desc(CrawlLog.id)).offset(offset).limit(limit)
    for f in filters:
        stmt = stmt.where(f)
    rows = s.execute(stmt).all()
    return {
        "count": len(rows),
        "total": total,
        "limit": limit,
        "offset": offset,
        "items": [
            {
                "id": r.CrawlLog.id,
                "site_code": r.site_code,
                "target_code": r.target_code,
                "article_url": r.CrawlLog.article_url,
                "strategy": r.CrawlLog.strategy,
                "http_status": r.CrawlLog.http_status,
                "duration_ms": r.CrawlLog.duration_ms,
                "success": r.CrawlLog.success,
                "error_msg": r.CrawlLog.error_msg,
                "occurred_at": r.CrawlLog.occurred_at.isoformat() if r.CrawlLog.occurred_at else None,
            }
            for r in rows
        ],
    }


@router.post("/api/logs/{log_id}/retry")
def retry_failed(
    log_id: int, background: BackgroundTasks,
    s: Session = Depends(_session),
) -> dict[str, Any]:
    """Re-attempt a single article URL via the v2 pipeline."""
    cl = s.get(CrawlLog, log_id)
    if cl is None:
        raise HTTPException(404, "log not found")
    if not (cl.target_id and cl.article_url):
        raise HTTPException(400, "log is missing target/url")

    tgt = targets_repo.get_by_id(s, cl.target_id)
    if tgt is None:
        raise HTTPException(400, f"crawl_target {cl.target_id} no longer exists")

    try:
        from govcrawler.pipeline import fetch_and_store  # type: ignore
    except ImportError as e:
        raise HTTPException(501, f"pipeline.fetch_and_store not wired for v2: {e}")

    url = cl.article_url
    target_code = tgt.target_code

    def _go():
        try:
            fetch_and_store(target_code=target_code, url=url)
        except Exception:
            log.exception("retry failed target=%s url=%s", target_code, url)

    background.add_task(_go)
    return {"queued": True, "log_id": log_id, "article_url": url}