"""Document ingestion endpoints – trigger and monitor Celery tasks.

文档入库接口模块。
提供手动触发入库、OA 系统 Webhook 接收文档、权限变更通知，
以及入库任务状态查询等功能。所有入库操作均通过 Celery 异步执行。
"""

from __future__ import annotations

from datetime import datetime, timezone
from pathlib import Path
from typing import Annotated, Any
from uuid import uuid4

from fastapi import APIRouter, Depends, Header, HTTPException, Request, UploadFile, File, Form
from pydantic import BaseModel

from app.api.deps import UserContext, get_current_user
from app.api.schemas.ingest import IngestRequest, IngestStatus
from app.config import settings
from app.core.ingest_trace_recorder import IngestTraceRecorder
from app.infrastructure.es_client import ESClient
from app.tasks.ingest_task import ingest_document_task, update_permissions_task
from app.utils.file_type import detect_file_type, is_supported
from app.utils.logger import get_logger

logger = get_logger(__name__)

router = APIRouter(prefix="/ingest", tags=["ingest"])


def _verify_webhook_token(authorization: str | None) -> None:
    expected = settings.rag_ingest_token
    if not expected:
        return
    actual = (authorization or "").strip()
    if actual.lower().startswith("bearer "):
        actual = actual[7:].strip()
    if actual != expected:
        raise HTTPException(
            status_code=401,
            detail="invalid ingest webhook token",
            headers={"WWW-Authenticate": "Bearer"},
        )


async def _write_pending_meta(
    request: Request,
    doc_id: str,
    file_path: str,
    metadata: dict[str, Any],
    task_id: str,
    file_type: str = "",
) -> None:
    """Write a minimal 'pending' record to the meta index when a task is queued.

    在任务入队时预写一条 pending 状态的元数据记录，
    使前端在 Worker 真正处理之前就能看到该文档。
    """
    try:
        es_client: ESClient = request.app.state.es_client
        now_iso = datetime.now(timezone.utc).isoformat()
        meta = metadata or {}
        original_filename = meta.get("original_filename", "")
        body: dict[str, Any] = {
            "doc_id": doc_id,
            "title": meta.get("title", ""),
            "original_filename": original_filename,
            "doc_number": meta.get("doc_number", ""),
            "issuing_org": meta.get("issuing_org", ""),
            "doc_type": meta.get("doc_type", ""),
            "document_scene_type": meta.get("document_scene_type", ""),
            "subject_words": meta.get("subject_words", []),
            "signer": meta.get("signer", ""),
            "publish_date": meta.get("publish_date") or None,
            "acl_ids": meta.get("acl_ids", []),
            "knowledge_category": meta.get("knowledge_category", ""),
            "knowledge_category_code": meta.get("knowledge_category_code", ""),
            "source_system": meta.get("source_system", ""),
            "source_article_id": meta.get("source_article_id", ""),
            "source_attachment_id": meta.get("source_attachment_id", ""),
            "source_site_code": meta.get("source_site_code", ""),
            "source_target_code": meta.get("source_target_code", ""),
            "source_url": meta.get("source_url", ""),
            "source_metadata": meta.get("source_metadata", {}),
            "service_guide_status": meta.get("service_guide_status", ""),
            "guide_profile_id": meta.get("guide_profile_id", ""),
            "guide_matter_name": meta.get("guide_matter_name", ""),
            "file_path": file_path,
            "file_type": file_type,
            "status": "pending",
            "task_id": task_id,
            "created_at": now_iso,
            "updated_at": now_iso,
        }
        await es_client.raw.index(
            index=settings.es_meta_index,
            id=doc_id,
            body=body,
        )
        logger.info("pending_meta_written", doc_id=doc_id, task_id=task_id)
    except Exception as exc:
        logger.warning("pending_meta_write_failed", doc_id=doc_id, error=str(exc))


def _make_trace_recorder(
    request: Request,
    doc_id: str,
    task_id: str,
) -> IngestTraceRecorder:
    """构造入库追踪记录器，用于在 API 层写入追踪事件。"""
    es_client: ESClient = request.app.state.es_client
    return IngestTraceRecorder(es_client, trace_id=task_id, doc_id=doc_id, task_id=task_id)


async def _write_initial_trace(
    recorder: IngestTraceRecorder,
    doc_id: str,
    metadata: dict[str, Any],
    file_type: str,
    source_type: str,
    *,
    file_size: int = 0,
) -> None:
    """Create the trace record and write the upload_received event."""
    try:
        meta = metadata or {}
        await recorder.start_trace(
            source_type=source_type,
            file_type=file_type,
            original_filename=meta.get("original_filename", ""),
            title=meta.get("title", ""),
            operator=meta.get("operator", ""),
        )
        size_str = f"{file_size / (1024*1024):.1f} MB" if file_size else ""
        await recorder.record(
            "upload_received", "completed",
            summary=f"收到{source_type}上传" + (f"，文件大小 {size_str}" if size_str else ""),
            details={
                "source_type": source_type,
                "file_type": file_type,
                "file_size": file_size,
                "original_filename": meta.get("original_filename", ""),
                "doc_id": doc_id,
                "title": meta.get("title", ""),
                "doc_number": meta.get("doc_number", ""),
                "issuing_org": meta.get("issuing_org", ""),
                "doc_type": meta.get("doc_type", ""),
                "acl_ids": meta.get("acl_ids", []),
                "knowledge_category": meta.get("knowledge_category", ""),
            },
            service="api",
        )
    except Exception as exc:
        logger.warning("trace_initial_write_failed", doc_id=doc_id, error=str(exc))


async def _record_task_queued(
    recorder: IngestTraceRecorder,
    doc_id: str,
    task_id: str,
    *,
    queue: str = "default",
) -> None:
    """Record that the Celery task was queued successfully."""
    try:
        await recorder.record(
            "task_queued", "completed",
            summary=f"Celery 任务已入队: {task_id[:8]}...",
            details={"task_id": task_id, "queue": queue},
            service="api",
        )
    except Exception as exc:
        logger.warning("trace_task_queued_write_failed", doc_id=doc_id, error=str(exc))


async def _record_task_queue_failed(
    recorder: IngestTraceRecorder,
    doc_id: str,
    task_id: str,
    error: Exception,
    *,
    queue: str = "default",
) -> None:
    """Record a queue failure without leaving a false-success trace."""
    try:
        error_message = str(error)
        await recorder.record(
            "task_queued",
            "failed",
            summary=f"Celery 任务入队失败: {error_message[:100]}",
            details={"task_id": task_id, "queue": queue},
            service="api",
            error_code="INGEST_TASK_QUEUE_FAILED",
            error_message=error_message,
        )
        await recorder.finish_trace(
            "failed",
            error_code="INGEST_TASK_QUEUE_FAILED",
            error_message=error_message,
        )
    except Exception as exc:
        logger.warning("trace_task_queue_failed_write_failed", doc_id=doc_id, error=str(exc))


@router.post("/trigger", response_model=IngestStatus)
async def trigger_ingest(
    request: Request,
    body: IngestRequest,
    user: Annotated[UserContext, Depends(get_current_user)],
) -> IngestStatus:
    """Queue a document for ingestion.

    手动触发文档入库。校验文件存在性和类型后，将任务发送至 Celery 队列。
    """
    raw_path = body.file_path
    if not raw_path:
        raise HTTPException(status_code=400, detail="file_path is required")

    file_path = Path(raw_path)
    if not file_path.is_absolute():
        file_path = settings.file_storage_path / file_path

    # 安全修复：解析符号链接后校验路径是否在存储目录内，防止路径穿越攻击
    file_path = file_path.resolve()
    if not file_path.is_relative_to(Path(settings.file_storage_path).resolve()):
        raise HTTPException(
            status_code=400,
            detail="Invalid file path: must be within the storage directory",
        )

    if not file_path.exists():
        raise HTTPException(
            status_code=400,
            detail=f"File not found: {raw_path}",
        )

    # Detect and validate file type
    file_type = detect_file_type(file_path)
    if not is_supported(file_type):
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported file type: {file_type}. Supported: {', '.join(settings.supported_file_types)}",
        )

    filename = file_path.name

    # Pre-generate Celery task ID so we can write trace BEFORE queuing,
    # eliminating the race where the worker starts before the trace exists.
    task_id = str(uuid4())
    recorder = _make_trace_recorder(request, body.doc_id, task_id)

    await _write_initial_trace(
        recorder, body.doc_id, body.metadata, file_type,
        source_type="manual",
    )

    try:
        ingest_document_task.apply_async(
            kwargs={"doc_id": body.doc_id, "file_path": filename, "metadata": body.metadata},
            task_id=task_id,
        )
    except Exception as exc:
        await _record_task_queue_failed(recorder, body.doc_id, task_id, exc)
        # 安全修复：不向客户端暴露内部异常详情，仅记录日志
        logger.exception("ingest_task_queue_failed", doc_id=body.doc_id)
        raise HTTPException(status_code=503, detail="操作失败，请稍后重试") from exc

    await _write_pending_meta(request, body.doc_id, filename, body.metadata, task_id, file_type)
    await _record_task_queued(recorder, body.doc_id, task_id)
    logger.info("ingest_triggered", doc_id=body.doc_id, task_id=task_id, file_type=file_type)

    return IngestStatus(
        task_id=task_id,
        status="PENDING",
        progress=0.0,
    )


class WebhookDocumentRequest(BaseModel):
    """Request body from OA webhook for new document.

    OA 系统推送新文档时使用的请求体结构。
    """
    doc_id: str
    title: str = ""
    doc_number: str = ""
    issuing_org: str = ""
    receiving_orgs: list[str] = []
    doc_type: str = ""
    subject_words: list[str] = []
    signer: str = ""
    publish_date: str = ""
    acl_ids: list[str] = []


@router.post("/webhook/document")
async def webhook_receive_document(
    request: Request,
    authorization: Annotated[str | None, Header()] = None,
    metadata: str = Form(...),
    file: UploadFile = File(...),
) -> dict[str, Any]:
    """Receive a document from OA system webhook.

    Expects multipart form data:
    - metadata: JSON string of document metadata (with acl_ids)
    - file: Document file (any supported format)

    Saves the file locally and queues ingestion.
    """
    import json

    _verify_webhook_token(authorization)

    try:
        meta_dict = json.loads(metadata)
    except json.JSONDecodeError as e:
        # 保留异常链，便于调试时追溯原始 JSON 解析错误
        raise HTTPException(status_code=400, detail=f"Invalid metadata JSON: {e}") from e

    doc_id = meta_dict.get("doc_id")
    if not doc_id:
        raise HTTPException(status_code=400, detail="doc_id is required in metadata")

    # Validate file size
    # 注意：await file.read() 会将整个文件读入内存。
    # 生产环境应通过 Nginx/反向代理的 client_max_body_size 限制上传大小，
    # 确保到达此处的文件大小已经可控。
    content = await file.read()
    file_size_mb = len(content) / (1024 * 1024)
    if file_size_mb > settings.max_file_size_mb:
        raise HTTPException(
            status_code=400,
            detail=f"File too large: {file_size_mb:.1f}MB (max: {settings.max_file_size_mb}MB)",
        )

    # Determine file extension from original filename
    original_name = file.filename or "document"
    ext = Path(original_name).suffix.lstrip(".").lower() or "bin"

    # Preserve original filename in metadata for display / title fallback
    meta_dict["original_filename"] = original_name

    # Save file temporarily (pipeline will copy to content-addressed path)
    file_dir = settings.file_storage_path
    file_dir.mkdir(parents=True, exist_ok=True)
    tmp_path = file_dir / f"_tmp_{doc_id}.{ext}"

    try:
        with open(tmp_path, "wb") as f:
            f.write(content)
    except Exception as e:
        # 保留异常链，便于调试时追溯文件写入失败的原因
        raise HTTPException(status_code=500, detail=f"Failed to save file: {e}") from e

    # Detect and validate file type
    file_type = detect_file_type(tmp_path)
    if not is_supported(file_type):
        tmp_path.unlink(missing_ok=True)
        raise HTTPException(
            status_code=400,
            detail=f"Unsupported file type: {file_type}. Supported: {', '.join(settings.supported_file_types)}",
        )

    logger.info(
        "webhook_received",
        doc_id=doc_id,
        file_size=len(content),
        file_type=file_type,
        file_path=str(tmp_path),
    )

    # Pass only the filename so the Celery worker can resolve it
    filename = tmp_path.name

    # Pre-generate task ID so trace is written BEFORE the worker can start.
    task_id = str(uuid4())
    recorder = _make_trace_recorder(request, doc_id, task_id)

    await _write_initial_trace(
        recorder, doc_id, meta_dict, file_type,
        source_type="webhook", file_size=len(content),
    )

    try:
        ingest_document_task.apply_async(
            kwargs={"doc_id": doc_id, "file_path": filename, "metadata": meta_dict},
            task_id=task_id,
        )
    except Exception as exc:
        await _record_task_queue_failed(recorder, doc_id, task_id, exc)
        tmp_path.unlink(missing_ok=True)
        # 安全修复：不向客户端暴露内部异常详情，仅记录日志
        logger.exception("webhook_ingest_task_queue_failed", doc_id=doc_id)
        raise HTTPException(status_code=503, detail="操作失败，请稍后重试") from exc

    await _write_pending_meta(request, doc_id, filename, meta_dict, task_id, file_type)
    await _record_task_queued(recorder, doc_id, task_id)

    return {
        "doc_id": doc_id,
        "task_id": task_id,
        "status": "queued",
        "file_type": file_type,
        "message": "Document received and queued for processing",
    }


class PermissionUpdateRequest(BaseModel):
    """Request body for permission change webhook.

    OA 系统推送权限变更时使用的请求体结构。
    """
    doc_id: str
    acl_ids: list[str] = []


@router.post("/webhook/permission")
async def webhook_update_permission(
    request: Request,
    body: PermissionUpdateRequest,
) -> dict[str, Any]:
    """Receive permission update from OA system."""
    task = update_permissions_task.delay(
        doc_id=body.doc_id,
        acl_ids=body.acl_ids,
    )

    logger.info(
        "permission_update_queued",
        doc_id=body.doc_id,
        task_id=task.id,
        acl_count=len(body.acl_ids),
    )

    return {
        "doc_id": body.doc_id,
        "task_id": task.id,
        "status": "queued",
    }


@router.get("/status/{task_id}", response_model=IngestStatus)
async def get_ingest_status(
    task_id: str,
    user: Annotated[UserContext, Depends(get_current_user)],
) -> IngestStatus:
    """Check the status of a running or completed ingest task.

    通过 Celery AsyncResult 查询任务状态，将内部状态映射为前端可理解的枚举值。
    """
    return _read_ingest_status(task_id)


@router.get("/webhook/status/{task_id}", response_model=IngestStatus)
async def get_webhook_ingest_status(
    task_id: str,
    authorization: Annotated[str | None, Header()] = None,
) -> IngestStatus:
    """Check webhook ingest status by task id.

    This endpoint is used by service-to-service exporters that only receive a
    task_id from /webhook/document and need to wait for final completion before
    acknowledging their local export. It requires the webhook token when
    RAG_INGEST_TOKEN is configured.
    """
    _verify_webhook_token(authorization)
    return _read_ingest_status(task_id)


def _read_ingest_status(task_id: str) -> IngestStatus:
    from celery.result import AsyncResult

    result = AsyncResult(task_id, app=ingest_document_task.app)

    if result.state == "PENDING":
        return IngestStatus(task_id=task_id, status="PENDING", progress=0.0)
    elif result.state == "PROCESSING":
        info = result.info or {}
        return IngestStatus(
            task_id=task_id,
            status="PROCESSING",
            progress=info.get("progress", 0.0),
        )
    elif result.state == "SUCCESS":
        info = result.result or {}
        raw_status = info.get("status", "failed")
        status_map = {
            "completed": "COMPLETED",
            "partial_failed": "PARTIAL_FAILED",
        }
        mapped = status_map.get(raw_status, "FAILED")
        return IngestStatus(
            task_id=task_id,
            status=mapped,
            progress=1.0,
            error=info.get("error"),
            deduplicated=info.get("deduplicated", False),
            content_hash=info.get("content_hash"),
        )
    elif result.state == "FAILURE":
        return IngestStatus(
            task_id=task_id,
            status="FAILED",
            progress=0.0,
            error=str(result.result),
        )
    else:
        return IngestStatus(
            task_id=task_id,
            status=result.state,
            progress=0.0,
        )