"""研究引擎格式化与标准化工具函数。

提供 ES 命中聚合、文档合并去重、上下文构建、LLM 消息组装、
研究计划/报告/章节重跑结果的标准化等纯函数，供 ResearchEngine 和
ResearchRetriever 使用。所有函数无状态依赖。
"""

from __future__ import annotations

from typing import Any

from app.api.schemas.research import ResearchPlan, ResearchTask
from app.prompts.qa_prompts import (
    QA_FOLLOWUP_TEMPLATE,
    QA_SYSTEM_PROMPT,
    QA_USER_TEMPLATE,
)
from app.prompts.research_prompts import (
    DEEP_RESEARCH_REPORT_SYSTEM,
    DEEP_RESEARCH_REPORT_USER,
    RESEARCH_FOLLOWUP_TEMPLATE,
    RESEARCH_SYSTEM_PROMPT,
    RESEARCH_USER_TEMPLATE,
)

# Maximum highlight passages per document in context
_MAX_PASSAGES_PER_DOC = 3
# Maximum characters per passage
_MAX_PASSAGE_CHARS = 600


def _aggregate_es_hits(
    raw: dict[str, Any], top_n: int, preferred_doc_ids: list[str] | None = None,
) -> list[dict[str, Any]]:
    """将 ES chunk 级命中按 doc_id 聚合为文档级结果，保留最高分和高亮片段。

    Aggregate ES chunk hits to document-level results.

    When *preferred_doc_ids* is provided and a chunk belongs to multiple
    documents, the first matching preferred doc_id is chosen instead of
    blindly picking ``doc_ids[0]``.  This is essential for notebook mode
    where the same chunk may be shared across notebooks.
    """
    hits = raw.get("hits", {}).get("hits", [])
    doc_map: dict[str, dict[str, Any]] = {}
    _preferred_set = set(preferred_doc_ids) if preferred_doc_ids else set()

    for hit in hits:
        src = hit.get("_source", {})
        # chunks 索引用 doc_ids (数组), meta 索引用 doc_id (字符串)
        doc_ids = src.get("doc_ids") or []
        doc_id = ""
        if doc_ids and _preferred_set:
            # Pick the first doc_id that is in the preferred set
            for _did in doc_ids:
                if _did in _preferred_set:
                    doc_id = _did
                    break
        if not doc_id:
            doc_id = doc_ids[0] if doc_ids else src.get("doc_id", "")
        if not doc_id:
            continue
        score = hit.get("_score", 0.0)
        hl_content = hit.get("highlight", {}).get("content", [])

        if doc_id not in doc_map:
            doc_map[doc_id] = {
                "doc_id": doc_id,
                "title": src.get("title", ""),
                "doc_number": src.get("doc_number"),
                "issuing_org": src.get("issuing_org"),
                "doc_type": src.get("doc_type"),
                "publish_date": src.get("publish_date"),
                "signer": src.get("signer"),
                "source_url": src.get("source_url"),
                "source_metadata": src.get("source_metadata") or {},
                "source_system": src.get("source_system"),
                "source_site_code": src.get("source_site_code"),
                "source_target_code": src.get("source_target_code"),
                "score": score,
                "passages": [],
                "matched_chunks": [],
            }
        doc = doc_map[doc_id]
        if score and score > (doc["score"] or 0):
            doc["score"] = score
        for hl in hl_content:
            if len(doc["passages"]) < _MAX_PASSAGES_PER_DOC:
                doc["passages"].append(hl)
        # Preserve chunk-level metadata for citation quick preview
        if hl_content and len(doc["matched_chunks"]) < 3:
            doc["matched_chunks"].append({
                "text": hl_content[0],
                "page_number": src.get("page_number"),
                "page_numbers": src.get("page_numbers") or [],
                "heading_hierarchy": src.get("heading_hierarchy") or [],
                "element_type": src.get("element_type") or "",
                "chunk_index": src.get("chunk_index"),
            })

    return sorted(
        doc_map.values(),
        key=lambda d: d.get("score") or 0,
        reverse=True,
    )[:top_n]


def _merge_unique_docs(*groups: list[dict[str, Any]]) -> list[dict[str, Any]]:
    """将多组文档按 doc_id 去重合并，保持优先级顺序（先出现的组优先）。

    当 explicit_docs 优先保留但缺少 matched_chunks 时，从后续同 doc_id
    的 es_docs 补并 chunk 元数据，确保引用快速预览可用。
    """
    merged: list[dict[str, Any]] = []
    seen: dict[str, dict[str, Any]] = {}
    for group in groups:
        for doc in group:
            doc_id = doc.get("doc_id")
            if not doc_id:
                continue
            if doc_id in seen:
                existing = seen[doc_id]
                if not existing.get("matched_chunks") and doc.get("matched_chunks"):
                    existing["matched_chunks"] = doc["matched_chunks"]
                continue
            seen[doc_id] = doc
            merged.append(doc)
    return merged


def _reference_identity(doc: dict[str, Any]) -> str | None:
    """为引用条目生成稳定身份键，允许 Guide 与原文档在同一 doc_id 下共存。"""
    doc_id = doc.get("doc_id")
    if not doc_id:
        return None

    source_group = str(doc.get("_source_type") or doc.get("source_group") or "search")
    if source_group == "guide":
        profile_id = doc.get("profile_id")
        return f"guide:{profile_id or doc_id}"
    return f"doc:{doc_id}"


def _merge_unique_reference_docs(*groups: list[dict[str, Any]]) -> list[dict[str, Any]]:
    """按引用身份去重，普通文档按 doc_id 合并，Guide 额外纳入 profile_id 维度。"""
    merged: list[dict[str, Any]] = []
    seen: set[str] = set()
    for group in groups:
        for doc in group:
            identity = _reference_identity(doc)
            if not identity or identity in seen:
                continue
            seen.add(identity)
            merged.append(doc)
    return merged


def _build_context_text(docs: list[dict[str, Any]]) -> str:
    """将检索到的文档格式化为编号上下文文本，作为 LLM 的参考材料输入。

    Format retrieved documents into numbered context passages.
    Uses ``_build_context_with_citation_map`` internally and discards the map
    for callers that don't need chunk-level references.
    """
    text, _ = _build_context_with_citation_map(docs)
    return text


def _build_context_with_citation_map(
    docs: list[dict[str, Any]],
) -> tuple[str, list[dict[str, Any]]]:
    """Build numbered context text **and** a citation_map for chunk-level refs.

    Each passage (or document with no passages) gets its own ``[N]`` number.
    The returned ``citation_map`` is a list indexed by ``N-1``, each entry
    containing ``doc_id`` plus chunk-level metadata so the frontend can show
    the exact passage when the user clicks a reference.
    """
    parts: list[str] = []
    citation_map: list[dict[str, Any]] = []  # index = ref_number - 1
    ref_idx = 0

    for doc in docs:
        title = doc.get("title") or "（无标题）"
        doc_id = doc.get("doc_id") or ""
        doc_number = doc.get("doc_number") or ""
        issuing_org = doc.get("issuing_org") or ""
        publish_date = doc.get("publish_date") or ""
        passages = doc.get("passages") or []
        chunks = doc.get("matched_chunks") or []

        if not passages:
            # Document with no passages — still include as a reference
            ref_idx += 1
            header = f"[{ref_idx}] 标题：《{title}》"
            if doc_number:
                header += f"\n     文号：{doc_number}"
            if issuing_org:
                header += f"\n     发文机关：{issuing_org}"
            if publish_date:
                header += f"\n     时间：{publish_date}"
            parts.append(header)
            citation_map.append({
                "doc_id": doc_id,
                "title": title,
                "doc_number": doc_number,
            })
            continue

        for p_idx, passage in enumerate(passages):
            ref_idx += 1
            clean_text = passage.replace("<em>", "").replace("</em>", "")

            # Try to get matching chunk metadata
            chunk = chunks[p_idx] if p_idx < len(chunks) else {}
            page = chunk.get("page_number")
            page_numbers = chunk.get("page_numbers") or []
            heading = chunk.get("heading_hierarchy") or []
            element_type = chunk.get("element_type") or ""
            chunk_index = chunk.get("chunk_index")

            # Build location hint for the LLM
            loc_parts: list[str] = []
            if page:
                loc_parts.append(f"第{page}页")
            elif page_numbers:
                loc_parts.append(f"第{'-'.join(str(p) for p in page_numbers)}页")
            if heading:
                loc_parts.append(" / ".join(heading))
            loc_hint = f" · {' · '.join(loc_parts)}" if loc_parts else ""

            header = f"[{ref_idx}] 《{title}》{loc_hint}"
            body = f"\n     ……{clean_text}……"
            parts.append(header + body)

            citation_map.append({
                "doc_id": doc_id,
                "title": title,
                "doc_number": doc_number,
                "page_number": page,
                "page_numbers": page_numbers,
                "heading_hierarchy": heading,
                "element_type": element_type,
                "chunk_index": chunk_index,
                "text": passage,  # Keep original with <em> for highlighting
            })

    return "\n\n".join(parts), citation_map


def _build_messages(
    question: str,
    context: str,
    history: list[dict[str, Any]],
    *,
    mode: str = "research",
) -> list[dict[str, str]]:
    """构建 LLM 对话消息列表：系统提示词 + 历史轮次 + 当前问题与上下文。

    Construct the messages list for the LLM."""
    system_prompt = RESEARCH_SYSTEM_PROMPT if mode == "research" else QA_SYSTEM_PROMPT
    followup_template = RESEARCH_FOLLOWUP_TEMPLATE if mode == "research" else QA_FOLLOWUP_TEMPLATE
    user_template = RESEARCH_USER_TEMPLATE if mode == "research" else QA_USER_TEMPLATE

    messages: list[dict[str, str]] = [
        {"role": "system", "content": system_prompt}
    ]

    # Append previous Q&A turns (multi-turn context)
    for turn in history:
        messages.append(
            {
                "role": "user",
                "content": followup_template.format(
                    question=turn["question"]
                ),
            }
        )
        messages.append({"role": "assistant", "content": turn["answer"]})

    # Current question
    messages.append(
        {
            "role": "user",
            "content": user_template.format(
                question=question, context=context
            ),
        }
    )
    return messages


def _build_task_brief(task: ResearchTask) -> str:
    """将研究任务格式化为文本摘要，用于 LLM 输入。"""
    parts = [
        f"- 模式：{task.mode}",
        f"- 主题：{task.topic}",
        f"- 核心问题：{task.question}",
        f"- 输出模板：{task.output_template}",
        f"- 研究深度：{task.depth_level}",
    ]
    if task.goal:
        parts.append(f"- 研究目标：{task.goal}")
    if task.time_range:
        parts.append(f"- 时间范围：{task.time_range}")
    if task.region_scope:
        parts.append(f"- 地域范围：{task.region_scope}")
    if task.org_scope:
        parts.append(f"- 部门范围：{task.org_scope}")
    if task.required_doc_ids:
        parts.append(f"- 必须纳入文档：{', '.join(task.required_doc_ids[:8])}")
    if task.required_matter_ids:
        parts.append(f"- 关注事项：{', '.join(task.required_matter_ids[:8])}")
    # Inject template-specific output constraint
    from app.prompts.research_prompts import get_template_constraint

    constraint = get_template_constraint(task.output_template)
    if constraint:
        parts.append(f"\n## 输出风格要求\n{constraint}")
    return "\n".join(parts)


def _build_plan_brief(plan: ResearchPlan) -> str:
    """将研究计划格式化为文本摘要，用于 LLM 输入。"""
    parts = [f"- 计划摘要：{plan.summary}"]
    if plan.objectives:
        parts.append("- 研究目标：" + "；".join(plan.objectives))
    if plan.sub_questions:
        parts.append("- 子问题：" + "；".join(plan.sub_questions))
    if plan.retrieval_focus:
        parts.append("- 检索重点：" + "；".join(plan.retrieval_focus))
    if plan.section_outline:
        parts.append("- 章节结构：" + "；".join(plan.section_outline))
    if plan.notes:
        parts.append("- 风险提示：" + "；".join(plan.notes))
    return "\n".join(parts)


def _default_sections_for_template(template: str) -> list[str]:
    """根据输出模板类型返回默认章节大纲。"""
    mapping = {
        "policy_brief": ["结论摘要", "政策依据与演进", "当前执行要求", "风险与建议"],
        "timeline": ["阶段划分", "关键节点", "政策演进", "阶段性判断"],
        "comparison": ["比较维度", "共同点", "差异点", "结论建议"],
        "implementation": ["任务目标", "执行链条", "落地难点", "推进建议"],
        "comprehensive": ["研究结论", "政策依据", "执行要求", "风险与建议"],
        "briefing_outline": ["汇报背景", "核心结论", "主要依据", "重点问题", "建议动作", "需关注事项"],
        "speech_draft": ["开场背景", "总体判断", "当前重点", "风险和问题", "下一步安排", "结束强调"],
    }
    return mapping.get(template, mapping["comprehensive"])


def _default_deliverables(template: str) -> list[str]:
    """根据输出模板类型返回默认交付物列表。"""
    if template == "timeline":
        return ["时间线梳理", "阶段判断", "关键依据列表"]
    if template == "comparison":
        return ["比较矩阵", "差异点归纳", "决策建议"]
    if template == "briefing_outline":
        return ["汇报提纲", "结论要点", "依据附录"]
    if template == "speech_draft":
        return ["讲话稿正文", "要点提示", "背景材料摘要"]
    return ["执行摘要", "章节化研究报告", "引用附录"]


def _normalize_text(value: Any, fallback: str = "") -> str:
    """安全地将任意值转为字符串，去除首尾空白。"""
    if isinstance(value, str):
        return value.strip()
    return fallback


def _normalize_list(value: Any, *, fallback: list[str] | None = None, limit: int = 8) -> list[str]:
    """安全地将任意值转为字符串列表，过滤空值并限制长度。"""
    items: list[str] = []
    if isinstance(value, list):
        for item in value:
            if isinstance(item, str) and item.strip():
                items.append(item.strip())
    if not items and fallback:
        items = [item for item in fallback if item]
    return items[:limit]


def _unique_strings(values: list[str]) -> list[str]:
    """对字符串列表去重并保持顺序。"""
    return [value for value in dict.fromkeys(v.strip() for v in values if isinstance(v, str) and v.strip())]


def _resolve_explicit_doc_ids(
    task: ResearchTask,
    plan: ResearchPlan,
    *,
    seed_doc_ids: list[str] | None = None,
    extra_doc_ids: list[str] | None = None,
) -> list[str]:
    """从任务和计划中合并显式纳入的文档 ID 列表。"""
    plan_doc_ids = list(plan.included_doc_ids or [])
    fallback_doc_ids = list(task.required_doc_ids) + list(seed_doc_ids or [])
    scoped_doc_ids = plan_doc_ids or fallback_doc_ids
    return _unique_strings(list(extra_doc_ids or []) + scoped_doc_ids)


def _normalize_research_plan(
    task: ResearchTask,
    result: dict[str, Any],
    *,
    keywords: list[str],
    planner_plan: Any,
    seed_doc_ids: list[str] | None,
) -> ResearchPlan:
    """标准化 LLM 返回的研究计划：填充缺失字段、合并图谱规划信息、添加安全回退默认值。"""
    retrieval_focus = _normalize_list(result.get("retrieval_focus"), fallback=keywords or [task.topic])

    if getattr(planner_plan, "doc_code", None):
        retrieval_focus.append(f"文号：{planner_plan.doc_code}")
    if getattr(planner_plan, "entity_name", None):
        retrieval_focus.append(f"实体：{planner_plan.entity_name}")
    if getattr(planner_plan, "matter_query", None):
        retrieval_focus.append(f"事项：{planner_plan.matter_query}")

    notes = _normalize_list(result.get("notes"), fallback=[])
    if seed_doc_ids:
        notes.append(f"已显式纳入 {len(seed_doc_ids)} 份资料，应优先核对这些材料。")
    if task.required_matter_ids:
        notes.append("已指定事项范围，需结合事项办理依据核查研究结论。")

    return ResearchPlan(
        summary=_normalize_text(
            result.get("summary"),
            fallback=f'围绕\u201c{task.topic}\u201d拆解研究问题、检索依据链并生成结构化研究报告。',
        ),
        objectives=_normalize_list(
            result.get("objectives"),
            fallback=[task.goal or task.question, "识别关键依据链条", "归纳执行要求与风险"],
        ),
        sub_questions=_normalize_list(
            result.get("sub_questions"),
            fallback=[
                task.question,
                "现行政策依据链条如何构成？",
                "执行环节有哪些重点和风险？",
            ],
        ),
        retrieval_focus=_unique_strings(retrieval_focus)[:8],
        section_outline=_normalize_list(
            result.get("section_outline"),
            fallback=_default_sections_for_template(task.output_template),
            limit=3 if task.mode == "quick" else 8,
        ),
        expected_deliverables=_normalize_list(
            result.get("expected_deliverables"),
            fallback=_default_deliverables(task.output_template),
        ),
        notes=_unique_strings(notes)[:6],
        included_doc_ids=_unique_strings(list(task.required_doc_ids) + list(seed_doc_ids or [])),
        included_matter_ids=_unique_strings(list(task.required_matter_ids)),
    )


def _build_task_query(task: ResearchTask, plan: ResearchPlan) -> str:
    """将研究任务和计划合成为检索查询文本。"""
    parts = [task.topic, task.question]
    if task.goal:
        parts.append(task.goal)
    if task.time_range:
        parts.append(f"时间范围：{task.time_range}")
    if task.region_scope:
        parts.append(f"地域范围：{task.region_scope}")
    if task.org_scope:
        parts.append(f"部门范围：{task.org_scope}")
    if plan.retrieval_focus:
        parts.append("检索重点：" + "；".join(plan.retrieval_focus[:4]))
    return "\n".join(part for part in parts if part)


def _build_deep_research_messages(
    task: ResearchTask,
    plan: ResearchPlan,
    context: str,
    history: list[dict[str, Any]],
) -> list[dict[str, str]]:
    """构建深度研究报告生成的 LLM 消息列表。"""
    messages: list[dict[str, str]] = [
        {"role": "system", "content": DEEP_RESEARCH_REPORT_SYSTEM}
    ]

    for turn in history:
        messages.append(
            {
                "role": "user",
                "content": RESEARCH_FOLLOWUP_TEMPLATE.format(question=turn.get("question", "")),
            }
        )
        messages.append({"role": "assistant", "content": turn.get("answer", "")})

    messages.append(
        {
            "role": "user",
            "content": DEEP_RESEARCH_REPORT_USER.format(
                task_brief=_build_task_brief(task),
                plan_brief=_build_plan_brief(plan),
                context=context,
            ),
        }
    )
    return messages


def _build_final_document_messages(
    task: ResearchTask | dict[str, Any],
    plan: ResearchPlan | dict[str, Any],
    report: dict[str, Any],
    reference_docs: list[dict[str, Any]],
    *,
    output_template: str | None = None,
    chat_history: list[dict[str, Any]] | None = None,
    extra_materials: list[dict[str, Any]] | None = None,
) -> list[dict[str, str]]:
    """构建交付物文档生成的 LLM 消息列表。

    Parameters
    ----------
    chat_history:
        研究完成后的对话记录，重新生成文档时提供。
    extra_materials:
        后续上传的文件材料（item_type=uploaded_file），重新生成时纳入。
    """
    from app.prompts.research_prompts import (
        FINAL_DOCUMENT_SYSTEM, FINAL_DOCUMENT_USER,
        TEMPLATE_CONSTRAINTS, TEMPLATE_LABELS,
    )

    # Build system prompt with template-specific constraints
    system_prompt = FINAL_DOCUMENT_SYSTEM
    if output_template and output_template in TEMPLATE_CONSTRAINTS:
        label = TEMPLATE_LABELS.get(output_template, output_template)
        constraint = TEMPLATE_CONSTRAINTS[output_template]
        system_prompt += f"\n\n**当前输出模板：{label}**\n{constraint}"

    # Build structured report summary for the LLM
    structured_parts = []
    if report.get("executive_summary"):
        structured_parts.append(f"### 执行摘要\n{report['executive_summary']}")
    for f in report.get("findings", []):
        structured_parts.append(
            f"### 关键发现: {f['title']}\n强度: {f.get('strength', 'medium')}\n{f['content']}"
        )
    for c in report.get("conflicts", []):
        structured_parts.append(
            f"### 冲突/风险: {c['title']}\n严重性: {c.get('severity', 'medium')}\n{c['content']}"
        )
    for q in report.get("open_questions", []):
        structured_parts.append(f"### 待确认: {q['question']}\n原因: {q['reason']}")
    for s in report.get("sections", []):
        structured_parts.append(f"### 章节: {s['title']}\n摘要: {s.get('summary', '')}\n{s['content']}")
    if report.get("one_page_summary"):
        structured_parts.append(f"### 一页式摘要\n{report['one_page_summary']}")
    if report.get("recommended_next_steps"):
        structured_parts.append(
            "### 建议后续方向\n" + "\n".join(f"- {s}" for s in report["recommended_next_steps"])
        )
    structured_report = "\n\n".join(structured_parts)

    # Build reference list
    ref_lines = []
    for i, doc in enumerate(reference_docs, 1):
        title = doc.get("title", "")
        doc_number = doc.get("doc_number", "")
        label = f"[{i}] {title}"
        if doc_number:
            label += f" ({doc_number})"
        ref_lines.append(label)
    references_text = "\n".join(ref_lines) if ref_lines else "（无参考材料）"

    # Build task/plan briefs (support both dataclass and dict)
    if isinstance(task, dict):
        task_brief = f"主题: {task.get('topic', '')}\n问题: {task.get('question', '')}\n目标: {task.get('goal', '')}"
    else:
        task_brief = _build_task_brief(task)

    if isinstance(plan, dict):
        plan_brief = f"概述: {plan.get('summary', '')}"
        if plan.get("objectives"):
            plan_brief += "\n目标: " + "; ".join(plan["objectives"])
        if plan.get("section_outline"):
            plan_brief += "\n章节: " + "; ".join(plan["section_outline"])
    else:
        plan_brief = _build_plan_brief(plan)

    user_content = FINAL_DOCUMENT_USER.format(
        task_brief=task_brief,
        plan_brief=plan_brief,
        structured_report=structured_report,
        references=references_text,
    )

    # Append chat history context if available
    if chat_history:
        chat_lines = []
        for m in chat_history[-20:]:  # Last 20 messages
            role = "用户" if m.get("role") == "user" else "助手"
            chat_lines.append(f"{role}: {m.get('content', '')}")
        user_content += "\n\n## 研究完成后的补充对话\n" + "\n".join(chat_lines)

    # Append extra uploaded materials
    if extra_materials:
        mat_parts = []
        for mat in extra_materials:
            text = mat.get("extracted_text", "")
            if text:
                title = mat.get("title", "未命名文件")
                mat_parts.append(f"### 补充材料: {title}\n{text[:5000]}")
        if mat_parts:
            user_content += "\n\n## 后续补充材料\n" + "\n\n".join(mat_parts)

    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_content},
    ]


def _build_section_hint(section_title: str, section_summary: str | None) -> str:
    """构建章节重跑的提示文本。"""
    parts = [f"目标章节：{section_title}"]
    if section_summary:
        parts.append(f"当前摘要：{section_summary}")
    return "\n".join(parts)


def _normalize_source_indices(
    value: Any,
    docs: list[dict[str, Any]],
    citation_map: list[dict[str, Any]] | None = None,
) -> list[str]:
    """将 LLM 返回的引用序号列表映射为 doc_id 列表。

    When *citation_map* is provided, indices refer to passage-level citations;
    otherwise they refer to the legacy document-level ordering in *docs*.
    """
    if not isinstance(value, list):
        return []
    lookup = citation_map if citation_map is not None else docs
    doc_ids: list[str] = []
    for item in value:
        if not isinstance(item, int):
            continue
        if 1 <= item <= len(lookup):
            doc_ids.append(lookup[item - 1].get("doc_id", ""))
    return _unique_strings(doc_ids)


def _normalize_research_report(
    result: dict[str, Any],
    docs: list[dict[str, Any]],
    plan: ResearchPlan,
    citation_map: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
    """标准化 LLM 返回的研究报告：解析发现、冲突、待解问题和章节，将文档序号映射为 doc_id。

    When *citation_map* is given, ``source_indices`` are passage-level and the
    map is embedded in the returned report for frontend chunk-level preview.
    """
    findings: list[dict[str, Any]] = []
    for item in result.get("findings", [])[:8]:
        if not isinstance(item, dict):
            continue
        content = _normalize_text(item.get("content"))
        if not content:
            continue
        findings.append(
            {
                "title": _normalize_text(item.get("title"), fallback="关键发现"),
                "content": content,
                "strength": _normalize_text(item.get("strength"), fallback="medium") or "medium",
                "source_doc_ids": _normalize_source_indices(item.get("source_indices"), docs, citation_map),
            }
        )

    conflicts: list[dict[str, Any]] = []
    for item in result.get("conflicts", [])[:6]:
        if not isinstance(item, dict):
            continue
        content = _normalize_text(item.get("content"))
        if not content:
            continue
        conflicts.append(
            {
                "title": _normalize_text(item.get("title"), fallback="证据冲突或缺口"),
                "content": content,
                "severity": _normalize_text(item.get("severity"), fallback="medium") or "medium",
                "source_doc_ids": _normalize_source_indices(item.get("source_indices"), docs, citation_map),
            }
        )

    open_questions: list[dict[str, str]] = []
    for item in result.get("open_questions", [])[:6]:
        if not isinstance(item, dict):
            continue
        question = _normalize_text(item.get("question"))
        reason = _normalize_text(item.get("reason"))
        if question:
            open_questions.append({"question": question, "reason": reason})

    sections: list[dict[str, Any]] = []
    for item in result.get("sections", [])[:8]:
        if not isinstance(item, dict):
            continue
        content = _normalize_text(item.get("content"))
        if not content:
            continue
        sections.append(
            {
                "title": _normalize_text(item.get("title"), fallback="研究章节"),
                "summary": _normalize_text(item.get("summary")),
                "content": content,
                "source_doc_ids": _normalize_source_indices(item.get("source_indices"), docs, citation_map),
            }
        )

    if not sections:
        sections = [
            {
                "title": title,
                "summary": "",
                "content": "当前轮研究已完成检索，但结构化章节生成不足，请结合执行摘要与证据工作区继续补充。",
                "source_doc_ids": [],
            }
            for title in plan.section_outline[:4]
        ]

    return {
        "executive_summary": _normalize_text(result.get("executive_summary")),
        "findings": findings,
        "conflicts": conflicts,
        "open_questions": open_questions,
        "sections": sections,
        "one_page_summary": _normalize_text(result.get("one_page_summary")),
        "recommended_next_steps": _normalize_list(result.get("recommended_next_steps"), fallback=[]),
        "citation_map": citation_map or [],
    }


def _normalize_section_rerun_result(
    result: dict[str, Any],
    docs: list[dict[str, Any]],
    section_title: str,
) -> dict[str, Any]:
    """标准化章节重跑的 LLM 返回结果。"""
    raw_section = result.get("section") if isinstance(result, dict) else {}
    if not isinstance(raw_section, dict):
        raw_section = {}

    content = _normalize_text(
        raw_section.get("content"),
        fallback="当前轮局部重跑未生成有效章节内容，请补充证据范围后重试。",
    )

    return {
        "section": {
            "title": _normalize_text(raw_section.get("title"), fallback=section_title),
            "summary": _normalize_text(raw_section.get("summary")),
            "content": content,
            "source_doc_ids": _normalize_source_indices(raw_section.get("source_indices"), docs),
        },
        "notes": _normalize_list(result.get("notes"), fallback=[]),
    }


def _source_group_label(source_group: str) -> str:
    """将源分组标识符映射为中文标签。"""
    mapping = {
        "seed": "显式纳入资料",
        "search": "混合检索命中",
        "graph": "图谱补充材料",
        "guide": "办事指南结构化证据",
    }
    return mapping.get(source_group, "混合检索命中")


def _join_report_text(report: dict[str, Any]) -> str:
    """将结构化研究报告拼接为纯文本，用于会话存储。"""
    parts: list[str] = []
    if report.get("executive_summary"):
        parts.append(report["executive_summary"])
    for section in report.get("sections", []):
        title = section.get("title") or "研究章节"
        content = section.get("content") or ""
        parts.append(f"## {title}\n\n{content}")
    if report.get("one_page_summary"):
        parts.append("## 一页式摘要\n\n" + report["one_page_summary"])
    return "\n\n".join(part for part in parts if part)


def _join_section_text(section: dict[str, Any]) -> str:
    """将单个章节拼接为纯文本，用于会话存储。"""
    title = section.get("title") or "研究章节"
    content = section.get("content") or ""
    return f"## {title}\n\n{content}".strip()