"""
摘要生成器 —— 基于 LLM 为政务文档生成简明中文摘要，用于搜索结果展示和文档概览。
LLM-based summary generator for government documents.

Usage
-----
    generator = SummaryGenerator(llm_client)
    summary = await generator.generate(full_text, metadata)
    # summary = "本文件由××发布，主要内容为……"
"""

from __future__ import annotations

from typing import Any

from app.config import settings
from app.infrastructure.llm_client import LLMClient
from app.prompts.summary_generation import SYSTEM_PROMPT, USER_PROMPT
from app.utils.logger import get_logger

logger = get_logger(__name__)


class SummaryGenerator:
    """政务文档摘要生成器，调用 LLM 生成 150-250 字的中文摘要，失败时返回空字符串不阻塞入库。

    Generate a concise Chinese summary for a government document using an LLM.

    The generator calls ``LLMClient.complete`` which returns plain text — no
    JSON parsing required.  All failures are handled gracefully: ``generate``
    never raises; it returns an empty string on any error so the ingest
    pipeline can continue without a summary.
    """

    def __init__(
        self,
        llm_client: LLMClient,
        *,
        max_content_chars: int | None = None,
    ) -> None:
        self._llm = llm_client
        self._max_chars = max_content_chars or settings.summary_max_content_chars

    async def generate(
        self,
        text: str,
        metadata: dict[str, Any] | None = None,
    ) -> str:
        """Generate a summary for the given document text.

        Parameters
        ----------
        text:
            Full document text extracted from the PDF.  Only the first
            ``max_content_chars`` characters are forwarded to the LLM.
        metadata:
            Optional metadata dict containing ``title``, ``doc_type``,
            ``issuing_org`` etc. – used to make the prompt more specific.

        Returns
        -------
        str
            A 150-250 character Chinese summary, or an empty string if
            generation fails.
        """
        content = text[: self._max_chars].strip()
        if not content:
            logger.warning("summary_generation_empty_text")
            return ""

        meta = metadata or {}
        # 使用 safe_substitute 避免文档正文含大括号时报错
        prompt = USER_PROMPT.safe_substitute(
            title=meta.get("title", "（未知）"),
            doc_type=meta.get("doc_type", "（未知）"),
            issuing_org=meta.get("issuing_org", "（未知）"),
            max_chars=self._max_chars,
            content=content,
        )

        try:
            summary = await self._llm.complete(
                prompt,
                system=SYSTEM_PROMPT,
                temperature=0.3,
                # 使用配置的最大 token 数，避免硬编码 / Use configured max_tokens instead of hardcoded value
                max_tokens=settings.summary_max_tokens,
                # Disable extended chain-of-thought for speed – a factual
                # summary does not need deep reasoning.
                extra_body={"enable_thinking": False},
            )
            summary = summary.strip()

            logger.info(
                "summary_generated",
                length=len(summary),
                preview=summary[:60],
            )
            return summary

        except Exception as exc:
            logger.warning("summary_generation_failed", error=str(exc))
            return ""
