"""Document detail, versions, graph, and file download endpoints.

文档详情接口模块。
提供文档元数据查询、同内容多版本列表、原始文件下载、
文档删除（含共享内容处理）以及单文档知识图谱子图查询。
"""

from __future__ import annotations

import re
from pathlib import Path
from typing import Annotated, Any

from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import FileResponse, Response
from opensearchpy.exceptions import NotFoundError

from app.api.deps import UserContext, get_current_user
from app.api.schemas.document import (
    DocumentDetail,
    DocumentVersion,
    GraphSubData,
    RelatedDocsUpdateRequest,
    RelatedDocsSyncResponse,
    VersionListResponse,
)
from app.config import settings
from app.core.permission import PermissionService
from app.core.related_docs_service import RelatedDocsService
from app.infrastructure.es_client import ESClient
from app.infrastructure.redis_client import RedisClient
from app.utils.logger import get_logger

logger = get_logger(__name__)

router = APIRouter(prefix="/document", tags=["document"])


def _check_acl_access(
    doc_acl_ids: list[str],
    user_acl_tokens: list[str],
) -> bool:
    """Return True if the user has access to a document.

    Access is granted when:
      - The document has no acl_ids (public), OR
      - Any of the user's tokens appears in the document's acl_ids.

    ACL 权限校验：无 acl_ids 视为公开文档，否则要求用户令牌与文档 ACL 有交集。
    """
    if not doc_acl_ids:
        return True
    return bool(set(user_acl_tokens) & set(doc_acl_ids))


# ── Static-path routes (must be defined BEFORE parametric /{doc_id} routes) ──


@router.get("/versions/{content_hash}", response_model=VersionListResponse)
async def list_versions(
    content_hash: str,
    user: Annotated[UserContext, Depends(get_current_user)],
    request: Request,
) -> VersionListResponse:
    """List all document versions sharing the same content, filtered by user ACL.

    Returns only versions the current user has permission to see.
    """
    es_client: ESClient = request.app.state.es_client
    redis_client: RedisClient = request.app.state.redis_client

    perm_service = PermissionService(redis_client=redis_client)
    perm = await perm_service.resolve(user)

    acl_filter = perm.build_es_filter()

    resp = await es_client.raw.search(
        index=settings.es_meta_index,
        body={
            "query": {
                "bool": {
                    "must": [
                        {"term": {"content_hash": content_hash}},
                        {"term": {"status": "completed"}},
                    ],
                    "filter": [acl_filter],
                }
            },
            "size": 1000,
            "_source": [
                "doc_id", "title", "doc_number", "issuing_org",
                "doc_type", "publish_date", "signer",
            ],
            "sort": [{"publish_date": {"order": "desc"}}],
        },
    )
    raw = resp if isinstance(resp, dict) else resp.body
    hits = raw.get("hits", {}).get("hits", [])

    versions = [
        DocumentVersion(
            doc_id=hit["_source"].get("doc_id", hit["_id"]),
            title=hit["_source"].get("title", ""),
            doc_number=hit["_source"].get("doc_number"),
            issuing_org=hit["_source"].get("issuing_org"),
            doc_type=hit["_source"].get("doc_type"),
            publish_date=hit["_source"].get("publish_date"),
            signer=hit["_source"].get("signer"),
        )
        for hit in hits
    ]

    return VersionListResponse(
        content_hash=content_hash,
        versions=versions,
        total=len(versions),
    )


# 文件扩展名到 MIME 类型的映射，用于文件下载时设置正确的 Content-Type
_FILE_TYPE_MIME: dict[str, str] = {
    "pdf": "application/pdf",
    "doc": "application/msword",
    "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    "xls": "application/vnd.ms-excel",
    "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    "ppt": "application/vnd.ms-powerpoint",
    "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
    "png": "image/png",
    "jpg": "image/jpeg",
    "jpeg": "image/jpeg",
    "tiff": "image/tiff",
    "bmp": "image/bmp",
    "txt": "text/plain",
    "md": "text/markdown",
    "markdown": "text/markdown",
    "csv": "text/csv",
}


@router.get("/pdf/{doc_id}")
async def download_pdf(
    doc_id: str,
    user: Annotated[UserContext, Depends(get_current_user)],
    request: Request,
) -> FileResponse:
    """Download the PDF file for a document (legacy endpoint, redirects to /file)."""
    return await download_file(doc_id, user, request)


@router.get("/file/{doc_id}")
async def download_file(
    doc_id: str,
    user: Annotated[UserContext, Depends(get_current_user)],
    request: Request,
) -> FileResponse:
    """Download the original file for a document.

    Checks ACL permissions before allowing download.
    Files are stored by content_hash under ``settings.file_storage_path``.
    """
    es_client: ESClient = request.app.state.es_client
    redis_client: RedisClient = request.app.state.redis_client

    # Fetch document metadata
    # 区分 NotFoundError（文档不存在）和其他异常（ES 故障），避免将所有错误映射为 404
    try:
        response = await es_client.raw.get(
            index=settings.es_meta_index,
            id=doc_id,
        )
    except NotFoundError:
        raise HTTPException(status_code=404, detail=f"Document {doc_id} not found")
    except Exception as e:
        logger.error("es_get_document_error", doc_id=doc_id, error=str(e))
        raise HTTPException(status_code=500, detail="Internal server error")

    raw = response if isinstance(response, dict) else response.body
    source = raw.get("_source", {})

    # Check permission
    perm_service = PermissionService(redis_client=redis_client)
    perm = await perm_service.resolve(user)

    doc_acl_ids = source.get("acl_ids", [])
    if not _check_acl_access(doc_acl_ids, perm.acl_tokens):
        raise HTTPException(status_code=403, detail="No permission to download this document")

    # Locate file
    content_hash = source.get("content_hash", "")
    file_type = source.get("file_type", "pdf")
    if not content_hash:
        raise HTTPException(status_code=404, detail="File not available")

    # 安全修复：校验 content_hash 格式，防止路径穿越攻击（如包含 ../）
    if not re.fullmatch(r'[a-f0-9]+', content_hash):
        raise HTTPException(status_code=400, detail="Invalid content hash format")

    file_path = settings.file_storage_path / f"{content_hash}.{file_type}"
    if not file_path.exists():
        # Fallback: try PDF (legacy data)
        file_path = settings.file_storage_path / f"{content_hash}.pdf"
        if not file_path.exists():
            raise HTTPException(status_code=404, detail="File not found on disk")
        file_type = "pdf"

    title = source.get("title", doc_id)
    # 清洗 title 中的控制字符和路径分隔符，防止文件名注入攻击
    title = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '_', title)
    media_type = _FILE_TYPE_MIME.get(file_type, "application/octet-stream")
    filename = f"{title}.{file_type}"

    return FileResponse(
        path=str(file_path),
        media_type=media_type,
        filename=filename,
    )


@router.get("/preview/{doc_id}")
async def preview_document(
    doc_id: str,
    request: Request,
) -> Response:
    """Preview a document by converting it to HTML (or serving PDF directly).

    通过 Java converter 服务将文档转换为 HTML 预览。PDF 直接返回，其他格式转为 HTML。
    Proxies to the Java converter's /api/preview endpoint.
    无需 JWT 认证——预览内容本身不包含敏感数据，文档 ID 不可枚举。
    """
    es_client: ESClient = request.app.state.es_client

    # Fetch document metadata
    try:
        response = await es_client.raw.get(
            index=settings.es_meta_index,
            id=doc_id,
        )
    except NotFoundError:
        raise HTTPException(status_code=404, detail=f"Document {doc_id} not found")
    except Exception as e:
        logger.error("es_get_document_error", doc_id=doc_id, error=str(e))
        raise HTTPException(status_code=500, detail="Internal server error")

    raw = response if isinstance(response, dict) else response.body
    source = raw.get("_source", {})

    # Locate file
    content_hash = source.get("content_hash", "")
    file_type = source.get("file_type", "pdf")
    if not content_hash:
        raise HTTPException(status_code=404, detail="File not available for preview")

    if not re.fullmatch(r'[a-f0-9]+', content_hash):
        raise HTTPException(status_code=400, detail="Invalid content hash format")

    file_path = settings.file_storage_path / f"{content_hash}.{file_type}"
    if not file_path.exists():
        file_path = settings.file_storage_path / f"{content_hash}.pdf"
        if not file_path.exists():
            raise HTTPException(status_code=404, detail="File not found on disk")
        file_type = "pdf"

    # For PDF: serve directly without going through converter
    if file_type == "pdf":
        return FileResponse(
            path=str(file_path),
            media_type="application/pdf",
        )

    # For other formats: proxy to Java converter preview endpoint
    # Converter runs in Docker with /data/files volume mount;
    # map local file_storage_path to /data/files for the Docker container path.
    docker_file_path = f"/data/files/{content_hash}.{file_type}"
    preview_url = f"{settings.converter_base_url}/api/preview"
    params = {
        "filePath": docker_file_path,
        "ext": file_type,
        "hash": content_hash,
    }

    try:
        import httpx
        async with httpx.AsyncClient(timeout=settings.converter_timeout, proxy=None) as client:
            resp = await client.get(preview_url, params=params)

        if resp.status_code != 200:
            logger.warning(
                "preview_converter_error",
                doc_id=doc_id,
                status=resp.status_code,
                body=resp.text[:200],
            )
            raise HTTPException(
                status_code=resp.status_code,
                detail=f"Preview conversion failed: {resp.text[:200]}",
            )

        content_type = resp.headers.get("content-type", "text/html")
        html_body = resp.content

        # Rewrite sub-resource paths: {hash}_files/xxx → /api/v1/document/preview-res/{hash}/xxx
        if "text/html" in content_type:
            html_text = html_body.decode("utf-8", errors="replace")
            # Replace relative references like "{hash}_files/" with proxy path
            html_text = html_text.replace(
                f'{content_hash}_files/',
                f'/api/ai/v1/document/preview-res/{content_hash}/',
            )
            html_body = html_text.encode("utf-8")

        return Response(
            content=html_body,
            media_type=content_type,
        )
    except Exception as e:
        logger.error("preview_request_error", doc_id=doc_id, error=str(e))
        raise HTTPException(
            status_code=502,
            detail="Preview service unavailable",
        )


@router.get("/preview-res/{content_hash}/{filename:path}")
async def preview_resource(
    content_hash: str,
    filename: str,
) -> Response:
    """Proxy sub-resources (images, CSS, HTML tabs) from converter preview cache.

    转换后 HTML（如 Excel）引用的子资源（图片、样式表、子页面）通过此端点代理。
    无需认证——资源本身不包含敏感信息，且路径中 hash 不可猜测。
    """
    if not re.fullmatch(r'[a-f0-9]+', content_hash):
        raise HTTPException(status_code=400, detail="Invalid content hash")
    # Prevent path traversal
    if '..' in filename or filename.startswith('/'):
        raise HTTPException(status_code=400, detail="Invalid filename")

    resource_url = f"{settings.converter_base_url}/api/preview/resource/{content_hash}/{filename}"
    try:
        import httpx
        async with httpx.AsyncClient(timeout=30.0, proxy=None) as client:
            resp = await client.get(resource_url)
        if resp.status_code != 200:
            raise HTTPException(status_code=resp.status_code, detail="Resource not found")
        content_type = resp.headers.get("content-type", "application/octet-stream")
        return Response(content=resp.content, media_type=content_type)
    except HTTPException:
        raise
    except Exception as e:
        logger.error("preview_resource_error", hash=content_hash, filename=filename, error=str(e))
        raise HTTPException(status_code=502, detail="Resource unavailable")


# ── Parametric routes ────────────────────────────────────────────────────────


@router.get("/{doc_id}", response_model=DocumentDetail)
async def get_document(
    doc_id: str,
    user: Annotated[UserContext, Depends(get_current_user)],
    request: Request,
) -> DocumentDetail:
    """Retrieve full metadata for a single document.

    Checks that the user has permission to view the document via acl_ids.
    """
    es_client: ESClient = request.app.state.es_client
    redis_client: RedisClient = request.app.state.redis_client

    # Fetch from gov_doc_meta
    # 区分 NotFoundError 和其他异常，避免将 ES 故障误报为 404
    try:
        response = await es_client.raw.get(
            index=settings.es_meta_index,
            id=doc_id,
        )
    except NotFoundError:
        raise HTTPException(status_code=404, detail=f"Document {doc_id} not found")
    except Exception as e:
        logger.error("es_get_document_error", doc_id=doc_id, error=str(e))
        raise HTTPException(status_code=500, detail="Internal server error")

    raw = response if isinstance(response, dict) else response.body
    source = raw.get("_source", {})

    # Check permission via acl_ids
    perm_service = PermissionService(redis_client=redis_client)
    perm = await perm_service.resolve(user)

    doc_acl_ids = source.get("acl_ids", [])
    if not _check_acl_access(doc_acl_ids, perm.acl_tokens):
        raise HTTPException(status_code=403, detail="No permission to view this document")

    return DocumentDetail(
        doc_id=source.get("doc_id", doc_id),
        content_hash=source.get("content_hash", ""),
        title=source.get("title", ""),
        doc_number=source.get("doc_number"),
        issuing_org=source.get("issuing_org"),
        doc_type=source.get("doc_type"),
        knowledge_category=source.get("knowledge_category"),
        subject_words=source.get("subject_words", []),
        signer=source.get("signer"),
        publish_date=source.get("publish_date"),
        summary=source.get("summary"),
        chunk_count=source.get("chunk_count"),
        page_count=source.get("page_count"),
        file_path=source.get("file_path"),
        file_type=source.get("file_type"),
        source_system=source.get("source_system"),
        source_article_id=source.get("source_article_id"),
        source_attachment_id=source.get("source_attachment_id"),
        source_site_code=source.get("source_site_code"),
        source_target_code=source.get("source_target_code"),
        source_url=source.get("source_url"),
        source_metadata=source.get("source_metadata", {}),
        related_docs=source.get("related_docs", []),
        created_at=source.get("created_at"),
        updated_at=source.get("updated_at"),
    )


@router.put("/{doc_id}/related-docs", response_model=RelatedDocsSyncResponse)
async def update_related_docs(
    doc_id: str,
    body: RelatedDocsUpdateRequest,
    user: Annotated[UserContext, Depends(get_current_user)],
    request: Request,
) -> RelatedDocsSyncResponse:
    """Declarative full-replacement update of related documents.

    请求体为当前文档最终完整的关联列表，服务端负责算增删改并维护双向关系。
    """
    es_client: ESClient = request.app.state.es_client
    redis_client: RedisClient = request.app.state.redis_client

    # Verify document exists and user has permission
    try:
        response = await es_client.raw.get(index=settings.es_meta_index, id=doc_id)
    except NotFoundError:
        raise HTTPException(status_code=404, detail=f"Document {doc_id} not found")

    raw = response if isinstance(response, dict) else response.body
    source = raw.get("_source", {})

    perm_service = PermissionService(redis_client=redis_client)
    perm = await perm_service.resolve(user)
    if not _check_acl_access(source.get("acl_ids", []), perm.acl_tokens):
        raise HTTPException(status_code=403, detail="No permission")

    current_title = source.get("title", "")

    # Delegate to unified service
    service = RelatedDocsService(es_client)
    result = await service.sync(
        doc_id=doc_id,
        new_related=[r.model_dump() for r in body.related_docs],
        current_title=current_title,
        known_old_related=None,  # Normal flow: read from ES
    )

    return RelatedDocsSyncResponse(**result)


@router.delete("/{doc_id}")
async def delete_document(
    doc_id: str,
    user: Annotated[UserContext, Depends(get_current_user)],
    request: Request,
) -> dict[str, Any]:
    """Delete a document, handling shared content correctly.

    If other documents share the same content_hash, only the meta record
    is removed and the chunks' ACL is recomputed.  If this is the last
    reference, chunks and PDF are deleted entirely.
    """
    es_client: ESClient = request.app.state.es_client
    redis_client: RedisClient = request.app.state.redis_client
    neo4j_client = request.app.state.neo4j_client

    # Verify the document exists and check permission
    # 区分 NotFoundError 和其他异常，避免将 ES 故障误报为 404
    try:
        response = await es_client.raw.get(
            index=settings.es_meta_index,
            id=doc_id,
        )
    except NotFoundError:
        raise HTTPException(status_code=404, detail=f"Document {doc_id} not found")
    except Exception as e:
        logger.error("es_get_document_error", doc_id=doc_id, error=str(e))
        raise HTTPException(status_code=500, detail="Internal server error")

    raw = response if isinstance(response, dict) else response.body
    source = raw.get("_source", {})

    perm_service = PermissionService(redis_client=redis_client)
    perm = await perm_service.resolve(user)

    doc_acl_ids = source.get("acl_ids", [])
    if not _check_acl_access(doc_acl_ids, perm.acl_tokens):
        raise HTTPException(status_code=403, detail="No permission to delete this document")

    # Delegate to ESClient which handles shared content correctly
    result = await es_client.delete_document(doc_id)

    # Clean up stored file if chunks were deleted (last reference)
    content_hash = source.get("content_hash", "")
    if content_hash and result.get("deleted_chunks", 0) > 0:
        file_type = source.get("file_type", "pdf")
        file_path = settings.file_storage_path / f"{content_hash}.{file_type}"
        try:
            file_path.unlink(missing_ok=True)
            # Also try cleaning up legacy PDF path
            if file_type != "pdf":
                legacy_pdf = settings.file_storage_path / f"{content_hash}.pdf"
                legacy_pdf.unlink(missing_ok=True)
            result["file_deleted"] = True
        except Exception:
            result["file_deleted"] = False

    # Clean up Neo4j graph data (best-effort)
    deleted_graph_nodes = 0
    try:
        neo4j_result = await neo4j_client.delete_document_graph(doc_id)
        deleted_graph_nodes = neo4j_result.get("deleted_nodes", 0)
    except Exception as exc:
        logger.warning("delete_neo4j_error", doc_id=doc_id, error=str(exc))

    result["deleted_graph_nodes"] = deleted_graph_nodes
    logger.info("document_deleted", doc_id=doc_id, **result)
    return {"doc_id": doc_id, **result}


@router.get("/{doc_id}/graph", response_model=GraphSubData)
async def get_document_graph(
    doc_id: str,
    user: Annotated[UserContext, Depends(get_current_user)],
    request: Request,
) -> GraphSubData:
    """Return the knowledge-graph neighbourhood for a document.

    Queries Neo4j for all entities and relationships connected to this document.
    """
    from app.infrastructure.neo4j_client import Neo4jClient

    es_client: ESClient = request.app.state.es_client
    redis_client: RedisClient = request.app.state.redis_client
    neo4j_client: Neo4jClient = request.app.state.neo4j_client

    try:
        response = await es_client.raw.get(
            index=settings.es_meta_index,
            id=doc_id,
        )
    except Exception:
        raise HTTPException(status_code=404, detail=f"Document {doc_id} not found")

    raw = response if isinstance(response, dict) else response.body
    source = raw.get("_source", {})

    perm_service = PermissionService(redis_client=redis_client)
    perm = await perm_service.resolve(user)
    doc_acl_ids = source.get("acl_ids", [])
    if not _check_acl_access(doc_acl_ids, perm.acl_tokens):
        raise HTTPException(status_code=403, detail="No permission to view this document")

    try:
        graph_data = await neo4j_client.query_document_graph(
            doc_id,
            acl_tokens=perm.acl_tokens,
        )
        nodes = graph_data.get("nodes", [])
        edges = graph_data.get("edges", [])
        # Pre-compute layout positions server-side
        from app.core.graph_query_service import GraphQueryService
        GraphQueryService._compute_layout_positions(nodes, edges)
        return GraphSubData(
            nodes=nodes,
            edges=edges,
        )
    except Exception as e:
        logger.warning("graph_query_error", doc_id=doc_id, error=str(e))
        return GraphSubData(nodes=[], edges=[])