"""Aggregate dashboard stats — 24h article counts, success rate, enablement."""
from __future__ import annotations

from datetime import datetime, timedelta
from typing import Any

from fastapi import Depends, Query
from sqlalchemy import and_, case, func, select
from sqlalchemy.orm import Session

from govcrawler.models import Article, CrawlLog, CrawlSite, CrawlTarget

from ._common import _session, router


@router.get("/api/stats")
def stats(s: Session = Depends(_session)) -> dict[str, Any]:
    now = datetime.utcnow()
    last_24h = now - timedelta(hours=24)

    total_articles = s.execute(select(func.count(Article.id))).scalar_one() or 0
    ready_articles = (
        s.execute(select(func.count(Article.id)).where(Article.status == "ready")).scalar_one()
        or 0
    )
    unexported = (
        s.execute(
            select(func.count(Article.id)).where(
                and_(Article.status == "ready", Article.exported_to_rag_at.is_(None))
            )
        ).scalar_one()
        or 0
    )
    articles_24h = (
        s.execute(
            select(func.count(Article.id)).where(
                and_(Article.status == "ready", Article.fetched_at >= last_24h)
            )
        ).scalar_one()
        or 0
    )

    # 24h success rate via crawl_logs
    total_logs = (
        s.execute(
            select(func.count(CrawlLog.id)).where(CrawlLog.occurred_at >= last_24h)
        ).scalar_one()
        or 0
    )
    ok_logs = (
        s.execute(
            select(
                func.sum(case((CrawlLog.success.is_(True), 1), else_=0))
            ).where(CrawlLog.occurred_at >= last_24h)
        ).scalar_one()
        or 0
    )
    success_rate = (float(ok_logs) / total_logs) if total_logs else None

    # Site/target counts — useful admin cues
    site_total = s.execute(select(func.count(CrawlSite.id))).scalar_one() or 0
    site_enabled = (
        s.execute(select(func.count(CrawlSite.id)).where(CrawlSite.enabled.is_(True))).scalar_one()
        or 0
    )
    target_total = s.execute(select(func.count(CrawlTarget.id))).scalar_one() or 0
    target_enabled = (
        s.execute(
            select(func.count(CrawlTarget.id)).where(CrawlTarget.enabled.is_(True))
        ).scalar_one()
        or 0
    )

    return {
        "total_articles": int(total_articles),
        "ready_articles": int(ready_articles),
        "unexported_to_rag": int(unexported),
        "articles_24h": int(articles_24h),
        "fetch_attempts_24h": int(total_logs),
        "success_rate_24h": success_rate,
        "sites": {"total": int(site_total), "enabled": int(site_enabled)},
        "targets": {"total": int(target_total), "enabled": int(target_enabled)},
        "now": now.isoformat(),
    }


# ---------------------------------------------------------------------------
# Per-site / per-target stats — powers the 采集目标统计 drill-down page.
# Two-level hierarchy: site → target. (We don't aggregate by dept_path here;
# the picker has site + dept filters already, and dept_path is best surfaced
# on the 部门绑定对账 page.)
# ---------------------------------------------------------------------------
@router.get("/api/stats/by-site")
def stats_by_site(s: Session = Depends(_session)) -> dict[str, Any]:
    """Site-level rollup: target count, article count, ready count,
    last_crawled_at (latest across the site's targets), 24h success rate."""
    now = datetime.utcnow()
    last_24h = now - timedelta(hours=24)

    # target_count + last_crawled_at per site
    target_agg = dict(
        s.execute(
            select(
                CrawlTarget.site_id,
                func.count(CrawlTarget.id).label("c"),
            ).group_by(CrawlTarget.site_id)
        ).all()
    )
    target_enabled_agg = dict(
        s.execute(
            select(
                CrawlTarget.site_id,
                func.count(CrawlTarget.id),
            ).where(CrawlTarget.enabled.is_(True))
            .group_by(CrawlTarget.site_id)
        ).all()
    )
    last_crawled_agg = dict(
        s.execute(
            select(
                CrawlTarget.site_id,
                func.max(CrawlTarget.last_crawled_at),
            ).group_by(CrawlTarget.site_id)
        ).all()
    )
    # article_count + ready_count per site
    article_agg = dict(
        s.execute(
            select(Article.site_id, func.count(Article.id))
            .group_by(Article.site_id)
        ).all()
    )
    ready_agg = dict(
        s.execute(
            select(Article.site_id, func.count(Article.id))
            .where(Article.status == "ready")
            .group_by(Article.site_id)
        ).all()
    )
    articles_24h_agg = dict(
        s.execute(
            select(Article.site_id, func.count(Article.id))
            .where(Article.fetched_at >= last_24h)
            .group_by(Article.site_id)
        ).all()
    )
    # 24h success rate per site
    log_total_agg = dict(
        s.execute(
            select(CrawlLog.site_id, func.count(CrawlLog.id))
            .where(CrawlLog.occurred_at >= last_24h)
            .group_by(CrawlLog.site_id)
        ).all()
    )
    log_ok_agg = dict(
        s.execute(
            select(
                CrawlLog.site_id,
                func.sum(case((CrawlLog.success.is_(True), 1), else_=0)),
            ).where(CrawlLog.occurred_at >= last_24h)
            .group_by(CrawlLog.site_id)
        ).all()
    )

    sites: list[dict[str, Any]] = []
    for site in s.execute(select(CrawlSite).order_by(CrawlSite.site_code)).scalars():
        adapter_type = (
            "gkmlpt" if site.cms_adapter == "gkmlpt"
            else (site.cms_adapter or ("yaml" if site.yaml_path else "unset"))
        )
        total = log_total_agg.get(site.id, 0)
        ok = int(log_ok_agg.get(site.id, 0) or 0)
        last = last_crawled_agg.get(site.id)
        sites.append({
            "site_code": site.site_code,
            "site_name": site.site_name,
            "adapter_type": adapter_type,
            "enabled": bool(site.enabled),
            "default_strategy": site.default_fetch_strategy,
            "target_count": int(target_agg.get(site.id, 0)),
            "target_enabled_count": int(target_enabled_agg.get(site.id, 0)),
            "article_count": int(article_agg.get(site.id, 0)),
            "ready_count": int(ready_agg.get(site.id, 0)),
            "articles_24h": int(articles_24h_agg.get(site.id, 0)),
            "last_crawled_at": last.isoformat() if last else None,
            "fetch_attempts_24h": int(total),
            "success_rate_24h": (float(ok) / total) if total else None,
        })
    sites.sort(key=lambda x: -x["article_count"])
    return {"count": len(sites), "sites": sites}


@router.get("/api/stats/by-target")
def stats_by_target(
    site: str | None = Query(None, description="filter by crawl_site.site_code"),
    s: Session = Depends(_session),
) -> dict[str, Any]:
    """Per-target rollup. With ?site=... limits to one site for the
    drill-down view; without filter returns ALL targets (use sparingly,
    fine for ~100 targets but a couple thousand will get heavy)."""
    now = datetime.utcnow()
    last_24h = now - timedelta(hours=24)

    article_count = dict(
        s.execute(
            select(Article.target_id, func.count(Article.id))
            .where(Article.target_id.isnot(None))
            .group_by(Article.target_id)
        ).all()
    )
    ready_count = dict(
        s.execute(
            select(Article.target_id, func.count(Article.id))
            .where(Article.target_id.isnot(None), Article.status == "ready")
            .group_by(Article.target_id)
        ).all()
    )
    articles_24h = dict(
        s.execute(
            select(Article.target_id, func.count(Article.id))
            .where(Article.target_id.isnot(None), Article.fetched_at >= last_24h)
            .group_by(Article.target_id)
        ).all()
    )
    log_total = dict(
        s.execute(
            select(CrawlLog.target_id, func.count(CrawlLog.id))
            .where(CrawlLog.target_id.isnot(None), CrawlLog.occurred_at >= last_24h)
            .group_by(CrawlLog.target_id)
        ).all()
    )
    log_ok = dict(
        s.execute(
            select(
                CrawlLog.target_id,
                func.sum(case((CrawlLog.success.is_(True), 1), else_=0)),
            ).where(CrawlLog.target_id.isnot(None), CrawlLog.occurred_at >= last_24h)
            .group_by(CrawlLog.target_id)
        ).all()
    )

    stmt = (
        select(CrawlTarget, CrawlSite.site_code, CrawlSite.site_name)
        .join(CrawlSite, CrawlSite.id == CrawlTarget.site_id)
    )
    if site:
        stmt = stmt.where(CrawlSite.site_code == site)
    stmt = stmt.order_by(CrawlSite.site_code, CrawlTarget.target_code)

    items: list[dict[str, Any]] = []
    for target, site_code, site_name in s.execute(stmt).all():
        total = log_total.get(target.id, 0)
        ok = int(log_ok.get(target.id, 0) or 0)
        items.append({
            "target_code": target.target_code,
            "target_name": target.target_name,
            "site_code": site_code,
            "site_name": site_name,
            "enabled": bool(target.enabled),
            "channel_name": target.channel_name,
            "channel_path": target.channel_path,
            "content_category": target.content_category,
            "expected_cadence_days": target.expected_cadence_days,
            "interval_sec": target.interval_sec,
            "interval_jitter_sec": target.interval_jitter_sec,
            "article_count": int(article_count.get(target.id, 0)),
            "ready_count": int(ready_count.get(target.id, 0)),
            "articles_24h": int(articles_24h.get(target.id, 0)),
            "last_crawled_at": target.last_crawled_at.isoformat() if target.last_crawled_at else None,
            "last_article_time": target.last_article_time.isoformat() if target.last_article_time else None,
            "created_at": target.created_at.isoformat() if target.created_at else None,
            "fetch_attempts_24h": int(total),
            "success_rate_24h": (float(ok) / total) if total else None,
        })
    items.sort(key=lambda x: -x["article_count"])
    return {"count": len(items), "targets": items}
