"""Rule-first graph query planner for Research mode.

Uses the already extracted question keywords to route questions to specific
GraphQueryService methods, then formats the returned graph data into concise
Chinese evidence blocks for the LLM context.

基于规则的图谱查询规划器模块。
在 Research 模式中，根据用户问题的关键词和句式特征，将问题分类为
不同的查询意图（政策链追溯/修订历史/实体文档/事项详情/主题探索），
然后调用对应的 GraphQueryService 方法获取图谱证据，
并将结果格式化为简洁的中文证据文本注入 LLM 上下文。
"""

from __future__ import annotations

import re
from dataclasses import dataclass, field
from enum import Enum
from typing import Any

from app.core.graph_query_service import GraphQueryService
from app.prompts.query_planning import (
    BUDGET_CARD_TEMPLATE,
    DATA_RESOURCE_CARD_TEMPLATE,
    ENTITY_DOCS_TEMPLATE,
    INDICATOR_CARD_TEMPLATE,
    INDUSTRY_CARD_TEMPLATE,
    MATTER_CARD_TEMPLATE,
    POLICY_CARD_TEMPLATE,
    POLICY_CHAIN_TEMPLATE,
    PROJECT_CARD_TEMPLATE,
    REVISION_HISTORY_TEMPLATE,
    SYSTEM_CARD_TEMPLATE,
    TASK_CARD_TEMPLATE,
    THEME_DOCS_TEMPLATE,
)
from app.utils.doc_code import extract_first_doc_code
from app.utils.logger import get_logger

logger = get_logger(__name__)


class QueryIntent(str, Enum):
    """Supported rule-based graph query intents.

    支持的图谱查询意图枚举。
    """

    POLICY_CHAIN = "policy_chain"
    REVISION_HISTORY = "revision_history"
    ENTITY_DOCS = "entity_docs"
    MATTER_DETAIL = "matter_detail"
    THEME_EXPLORE = "theme_explore"
    POLICY_DETAIL = "policy_detail"
    TASK_DETAIL = "task_detail"
    PROJECT_DETAIL = "project_detail"
    SYSTEM_DETAIL = "system_detail"
    DATA_RESOURCE_DETAIL = "data_resource_detail"
    BUDGET_DETAIL = "budget_detail"
    INDICATOR_DETAIL = "indicator_detail"
    INDUSTRY_DETAIL = "industry_detail"
    GENERAL = "general"


@dataclass(slots=True)
class QueryPlan:
    """Planner output for graph evidence collection."""

    intent: QueryIntent
    doc_id: str | None = None
    doc_code: str | None = None
    entity_name: str | None = None
    entity_label: str | None = None
    matter_query: str | None = None


@dataclass(slots=True)
class GraphEvidence:
    """Structured graph evidence plus supporting document IDs."""

    text: str = ""
    doc_ids: list[str] = field(default_factory=list)


_ORG_HINT_RE = re.compile(
    r"(政府|局|委|办|厅|部|院|署|中心|办公室|党组|协会|集团|银行)$"
)
_REGION_HINT_RE = re.compile(r"(省|市|县|区|镇|乡|村)$")
_TIME_HINT_RE = re.compile(r"^(\d{4}年|近\d+年|近年来|目前|当前)$")

_POLICY_CHAIN_PATTERNS = [
    re.compile(r"依据|上位法|法律基础|制定依据|法规依据"),
]
_REVISION_HISTORY_PATTERNS = [
    re.compile(r"修订|修改|废止|历史沿革|变更|替代|取代"),
]
_ENTITY_DOCS_PATTERNS = [
    re.compile(r"(发布|出台|制定|印发).*(文件|政策|规定)"),
]
_MATTER_DETAIL_PATTERNS = [
    re.compile(r"办理|申请|材料|条件|时限|流程|需要什么|需要哪些|怎么办|如何办理"),
]
_THEME_EXPLORE_PATTERNS = [
    re.compile(r"相关政策|同类|主题|领域|专项"),
]
_PROJECT_HINT_PATTERNS = [
    re.compile(r"项目|工程|建设|采购|试点"),
]
_POLICY_HINT_PATTERNS = [
    re.compile(r"政策|措施|办法|规定|奖补"),
]
_TASK_HINT_PATTERNS = [
    re.compile(r"任务|工作|推进|整改|督办|落实"),
]
_SYSTEM_HINT_PATTERNS = [
    re.compile(r"平台|系统|应用|门户|业务系统"),
]
_DATA_RESOURCE_HINT_PATTERNS = [
    re.compile(r"数据资源|数据集|数据库|资源目录|证照库"),
]
_BUDGET_HINT_PATTERNS = [
    re.compile(r"预算|经费|资金|专项资金|财政投入"),
]
_INDICATOR_HINT_PATTERNS = [
    re.compile(r"指标|考核|目标值|KPI|评价指标"),
]
_INDUSTRY_HINT_PATTERNS = [
    re.compile(r"产业|行业|产业链|产业集群"),
]

_LABEL_TO_DETAIL_INTENT: dict[str, QueryIntent] = {
    "Project": QueryIntent.PROJECT_DETAIL,
    "Policy": QueryIntent.POLICY_DETAIL,
    "Task": QueryIntent.TASK_DETAIL,
    "System": QueryIntent.SYSTEM_DETAIL,
    "DataResource": QueryIntent.DATA_RESOURCE_DETAIL,
    "Budget": QueryIntent.BUDGET_DETAIL,
    "Indicator": QueryIntent.INDICATOR_DETAIL,
    "Industry": QueryIntent.INDUSTRY_DETAIL,
}

_MATTER_CAPTURE_PATTERNS = [
    re.compile(
        r"(?P<name>[\u4e00-\u9fffA-Za-z0-9]{2,40}?)"
        r"(?:需要什么材料|需要哪些材料|需要什么条件|需要哪些条件|办理条件|办理时限|办理流程|申请材料|申请条件|怎么办|如何办理|流程)"
    ),
]
_ENTITY_CAPTURE_PATTERNS = [
    re.compile(
        r"(?P<name>[\u4e00-\u9fffA-Za-z0-9]{2,40}?)"
        r"(?:发布|出台|制定|印发).*(?:文件|政策|规定)"
    ),
]
_THEME_CAPTURE_PATTERNS = [
    re.compile(
        r"(?P<name>[\u4e00-\u9fffA-Za-z0-9]{2,40}?)"
        r"(?:相关政策|同类政策|主题相关文件|主题政策|领域政策|专项政策|主题|领域|专项)"
    ),
]


class GraphQueryPlanner:
    """Rule-first graph intent classifier and evidence collector.

    规则优先的图谱查询规划器。
    通过正则模式匹配将自然语言问题分类为查询意图，
    再执行对应的图谱查询并格式化为 LLM 可用的证据文本。
    """

    def plan(self, question: str, keywords: list[str]) -> QueryPlan:
        """Classify the question into a graph query plan.

        按优先级依次匹配：政策链 > 修订历史 > 实体文档 > 事项详情 > 主题探索 > 通用。
        """
        text = question.strip()
        doc_code = self._extract_doc_code(text)

        if self._matches_any(text, _POLICY_CHAIN_PATTERNS):
            return QueryPlan(intent=QueryIntent.POLICY_CHAIN, doc_code=doc_code)

        if self._matches_any(text, _REVISION_HISTORY_PATTERNS):
            return QueryPlan(intent=QueryIntent.REVISION_HISTORY, doc_code=doc_code)

        if self._matches_any(text, _ENTITY_DOCS_PATTERNS):
            entity_name = self._extract_named_segment(text, _ENTITY_CAPTURE_PATTERNS)
            entity_name = entity_name or self._pick_entity_candidate(keywords)
            entity_label = self._infer_entity_label(entity_name)
            return QueryPlan(
                intent=QueryIntent.ENTITY_DOCS,
                entity_name=entity_name,
                entity_label=entity_label,
            )

        # Specific intent patterns take priority over generic label inference
        if self._matches_any(text, _MATTER_DETAIL_PATTERNS):
            matter_query = self._extract_named_segment(text, _MATTER_CAPTURE_PATTERNS)
            matter_query = matter_query or self._pick_matter_candidate(keywords)
            return QueryPlan(
                intent=QueryIntent.MATTER_DETAIL,
                matter_query=matter_query,
            )

        if self._matches_any(text, _THEME_EXPLORE_PATTERNS):
            entity_name = self._extract_named_segment(text, _THEME_CAPTURE_PATTERNS)
            entity_name = entity_name or self._pick_theme_candidate(keywords)
            return QueryPlan(
                intent=QueryIntent.THEME_EXPLORE,
                doc_code=doc_code,
                entity_name=entity_name,
                entity_label="PolicyTheme",
            )

        # Generic entity label inference → detail intents
        inferred_label = self._infer_entity_label(text)
        if inferred_label in _LABEL_TO_DETAIL_INTENT:
            entity_name = self._pick_generic_candidate(keywords) or text
            return QueryPlan(
                intent=_LABEL_TO_DETAIL_INTENT[inferred_label],
                entity_name=entity_name,
                entity_label=inferred_label,
            )

        return QueryPlan(intent=QueryIntent.GENERAL)

    async def collect_evidence(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        """Execute the plan and format graph evidence for LLM context."""
        try:
            if plan.intent == QueryIntent.POLICY_CHAIN:
                return await self._collect_policy_chain(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.REVISION_HISTORY:
                return await self._collect_revision_history(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.ENTITY_DOCS:
                return await self._collect_entity_docs(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.MATTER_DETAIL:
                return await self._collect_matter_detail(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.THEME_EXPLORE:
                return await self._collect_theme_docs(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.POLICY_DETAIL:
                return await self._collect_policy_detail(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.TASK_DETAIL:
                return await self._collect_task_detail(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.PROJECT_DETAIL:
                return await self._collect_project_detail(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.SYSTEM_DETAIL:
                return await self._collect_system_detail(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.DATA_RESOURCE_DETAIL:
                return await self._collect_data_resource_detail(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.BUDGET_DETAIL:
                return await self._collect_budget_detail(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.INDICATOR_DETAIL:
                return await self._collect_indicator_detail(plan, graph_service, acl_tokens=acl_tokens)

            if plan.intent == QueryIntent.INDUSTRY_DETAIL:
                return await self._collect_industry_detail(plan, graph_service, acl_tokens=acl_tokens)
        except Exception as exc:
            logger.warning(
                "graph_evidence_collect_failed",
                intent=plan.intent.value,
                error=str(exc),
            )

        return GraphEvidence()

    async def _collect_policy_chain(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        doc_id = await self._resolve_doc_id(plan, graph_service, acl_tokens=acl_tokens)
        if not doc_id:
            return GraphEvidence()

        data = await self._call_graph(
            graph_service.get_policy_chain,
            doc_id,
            max_depth=5,
            acl_tokens=acl_tokens,
        )
        chain = data.get("chain") or []
        edges = data.get("edges") or []
        if not chain:
            return GraphEvidence()

        text = POLICY_CHAIN_TEMPLATE.format(
            chain=self._format_policy_chain(chain, edges)
        )
        return GraphEvidence(text=text, doc_ids=self._doc_ids_from_items(chain))

    async def _collect_revision_history(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        doc_id = await self._resolve_doc_id(plan, graph_service, acl_tokens=acl_tokens)
        if not doc_id:
            return GraphEvidence()

        data = await self._call_graph(
            graph_service.get_revision_history,
            doc_id,
            max_depth=5,
            acl_tokens=acl_tokens,
        )
        documents = data.get("documents") or []
        edges = data.get("edges") or []
        if not documents:
            return GraphEvidence()

        text = REVISION_HISTORY_TEMPLATE.format(
            history=self._format_revision_history(documents, edges)
        )
        return GraphEvidence(text=text, doc_ids=self._doc_ids_from_items(documents))

    async def _collect_entity_docs(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        if not plan.entity_name:
            return GraphEvidence()

        resolved = await self._resolve_entity(
            graph_service,
            plan.entity_name,
            plan.entity_label,
            acl_tokens=acl_tokens,
        )
        if not resolved:
            return GraphEvidence()

        entity_name, entity_label = resolved
        docs = await self._call_graph(
            graph_service.get_docs_by_entity,
            entity_name,
            entity_label,
            limit=8,
            acl_tokens=acl_tokens,
        )
        if not docs:
            return GraphEvidence()

        text = ENTITY_DOCS_TEMPLATE.format(
            entity_name=entity_name,
            docs=self._format_docs_list(docs),
        )
        return GraphEvidence(text=text, doc_ids=self._doc_ids_from_items(docs))

    async def _collect_matter_detail(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        if not plan.matter_query:
            return GraphEvidence()

        matters = await self._call_graph(
            graph_service.search_matters,
            plan.matter_query,
            limit=5,
            acl_tokens=acl_tokens,
        )
        match = self._pick_best_named_item(matters, plan.matter_query)
        if not match:
            return GraphEvidence()

        card = await self._call_graph(
            graph_service.get_matter_card,
            match["matter_id"],
            acl_tokens=acl_tokens,
        )
        if not card:
            return GraphEvidence()

        text = MATTER_CARD_TEMPLATE.format(
            name=card.get("name") or plan.matter_query,
            conditions=self._format_prop_names(card.get("conditions"), empty="未提取到明确条件"),
            materials=self._format_prop_names(card.get("materials"), empty="未提取到明确材料"),
            time_limits=self._format_prop_names(card.get("time_limits"), empty="未提取到明确时限"),
            handled_by=self._format_prop_names(card.get("handled_by"), empty="未提取到明确办理机构"),
            governing_docs=self._format_gov_docs(card.get("governing_docs")),
        )
        return GraphEvidence(
            text=text,
            doc_ids=self._doc_ids_from_items(card.get("governing_docs") or []),
        )

    async def _collect_theme_docs(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        doc_id = await self._resolve_doc_id(plan, graph_service, acl_tokens=acl_tokens)
        if doc_id:
            docs = await self._call_graph(
                graph_service.get_same_theme_documents,
                doc_id,
                limit=8,
                acl_tokens=acl_tokens,
            )
            if docs:
                theme_name = docs[0].get("theme_name") or (plan.entity_name or "同主题")
                text = THEME_DOCS_TEMPLATE.format(
                    theme=theme_name,
                    docs=self._format_docs_list(docs),
                )
                return GraphEvidence(text=text, doc_ids=self._doc_ids_from_items(docs))

        if not plan.entity_name:
            return GraphEvidence()

        resolved = await self._resolve_entity(
            graph_service,
            plan.entity_name,
            "PolicyTheme",
            acl_tokens=acl_tokens,
        )
        if not resolved:
            return GraphEvidence()

        theme_name, _ = resolved
        docs = await self._call_graph(
            graph_service.get_docs_by_entity,
            theme_name,
            "PolicyTheme",
            limit=8,
            acl_tokens=acl_tokens,
        )
        if not docs:
            return GraphEvidence()

        text = THEME_DOCS_TEMPLATE.format(
            theme=theme_name,
            docs=self._format_docs_list(docs),
        )
        return GraphEvidence(text=text, doc_ids=self._doc_ids_from_items(docs))

    async def _collect_policy_detail(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        if not plan.entity_name:
            return GraphEvidence()

        entity_key = await self._call_graph(
            graph_service.resolve_entity_by_exact_name,
            plan.entity_name,
            "Policy",
            acl_tokens=acl_tokens,
        )
        if not entity_key:
            return GraphEvidence()

        card = await self._call_graph(
            graph_service.get_policy_card,
            entity_key,
            acl_tokens=acl_tokens,
        )
        if not card:
            return GraphEvidence()

        text = POLICY_CARD_TEMPLATE.format(
            name=card.get("name") or plan.entity_name,
            summary=card.get("summary") or "无",
            assigned_orgs=self._format_name_list(card.get("assigned_orgs")),
            implementing_tasks=self._format_ref_names(card.get("implementing_tasks")),
            supported_projects=self._format_ref_names(card.get("supported_projects")),
            related_themes=self._format_name_list(card.get("related_themes")),
            source_docs=self._format_card_docs(card.get("source_docs")),
        )
        return GraphEvidence(
            text=text,
            doc_ids=self._doc_ids_from_items(card.get("source_docs") or []),
        )

    async def _collect_task_detail(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        if not plan.entity_name:
            return GraphEvidence()

        entity_key = await self._call_graph(
            graph_service.resolve_entity_by_exact_name,
            plan.entity_name,
            "Task",
            acl_tokens=acl_tokens,
        )
        if not entity_key:
            return GraphEvidence()

        card = await self._call_graph(
            graph_service.get_task_card,
            entity_key,
            acl_tokens=acl_tokens,
        )
        if not card:
            return GraphEvidence()

        text = TASK_CARD_TEMPLATE.format(
            name=card.get("name") or plan.entity_name,
            lead_orgs=self._format_name_list(card.get("lead_orgs")),
            assist_orgs=self._format_name_list(card.get("assist_orgs")),
            implementing_policies=self._format_ref_names(card.get("implementing_policies")),
            evaluating_indicators=self._format_name_list(card.get("evaluating_indicators")),
            budgets=self._format_budget_list(card.get("budgets")),
            source_docs=self._format_card_docs(card.get("source_docs")),
        )
        return GraphEvidence(
            text=text,
            doc_ids=self._doc_ids_from_items(card.get("source_docs") or []),
        )

    async def _collect_project_detail(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        if not plan.entity_name:
            return GraphEvidence()

        entity_key = await self._call_graph(
            graph_service.resolve_entity_by_exact_name,
            plan.entity_name,
            "Project",
            acl_tokens=acl_tokens,
        )
        if not entity_key:
            return GraphEvidence()

        card = await self._call_graph(
            graph_service.get_project_card,
            entity_key,
            acl_tokens=acl_tokens,
        )
        if not card:
            return GraphEvidence()

        text = PROJECT_CARD_TEMPLATE.format(
            name=card.get("name") or plan.entity_name,
            implementing_orgs=self._format_name_list(card.get("implementing_orgs")),
            locations=self._format_name_list(card.get("locations")),
            supporting_policies=self._format_ref_names(card.get("supporting_policies")),
            budgets=self._format_budget_list(card.get("budgets")),
            technologies=self._format_name_list(card.get("technologies")),
            evaluating_indicators=self._format_name_list(card.get("evaluating_indicators")),
            source_docs=self._format_card_docs(card.get("source_docs")),
        )
        return GraphEvidence(
            text=text,
            doc_ids=self._doc_ids_from_items(card.get("source_docs") or []),
        )

    async def _collect_system_detail(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        if not plan.entity_name:
            return GraphEvidence()

        entity_key = await self._call_graph(
            graph_service.resolve_entity_by_exact_name,
            plan.entity_name,
            "System",
            acl_tokens=acl_tokens,
        )
        if not entity_key:
            return GraphEvidence()

        card = await self._call_graph(
            graph_service.get_system_card,
            entity_key,
            acl_tokens=acl_tokens,
        )
        if not card:
            return GraphEvidence()

        text = SYSTEM_CARD_TEMPLATE.format(
            name=card.get("name") or plan.entity_name,
            operated_by=self._format_name_list(card.get("operated_by")),
            managed_data=self._format_name_list(card.get("managed_data")),
            technologies=self._format_name_list(card.get("technologies")),
        )
        return GraphEvidence(text=text)

    async def _collect_data_resource_detail(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        if not plan.entity_name:
            return GraphEvidence()

        entity_key = await self._call_graph(
            graph_service.resolve_entity_by_exact_name,
            plan.entity_name,
            "DataResource",
            acl_tokens=acl_tokens,
        )
        if not entity_key:
            return GraphEvidence()

        card = await self._call_graph(
            graph_service.get_data_resource_card,
            entity_key,
            acl_tokens=acl_tokens,
        )
        if not card:
            return GraphEvidence()

        text = DATA_RESOURCE_CARD_TEMPLATE.format(
            name=card.get("name") or plan.entity_name,
            managed_by_systems=self._format_name_list(card.get("managed_by_systems")),
            conforms_to=self._format_name_list(card.get("conforms_to")),
        )
        return GraphEvidence(text=text)

    async def _collect_budget_detail(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        if not plan.entity_name:
            return GraphEvidence()

        entity_key = await self._call_graph(
            graph_service.resolve_entity_by_exact_name,
            plan.entity_name,
            "Budget",
            acl_tokens=acl_tokens,
        )
        if not entity_key:
            return GraphEvidence()

        card = await self._call_graph(
            graph_service.get_budget_card,
            entity_key,
            acl_tokens=acl_tokens,
        )
        if not card:
            return GraphEvidence()

        text = BUDGET_CARD_TEMPLATE.format(
            name=card.get("name") or plan.entity_name,
            funded_tasks=self._format_ref_names(card.get("funded_tasks")),
            funded_projects=self._format_ref_names(card.get("funded_projects")),
            related_docs=self._format_related_docs(card.get("related_docs")),
        )
        return GraphEvidence(
            text=text,
            doc_ids=self._doc_ids_from_items(card.get("related_docs") or []),
        )

    async def _collect_indicator_detail(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        if not plan.entity_name:
            return GraphEvidence()

        entity_key = await self._call_graph(
            graph_service.resolve_entity_by_exact_name,
            plan.entity_name,
            "Indicator",
            acl_tokens=acl_tokens,
        )
        if not entity_key:
            return GraphEvidence()

        card = await self._call_graph(
            graph_service.get_indicator_card,
            entity_key,
            acl_tokens=acl_tokens,
        )
        if not card:
            return GraphEvidence()

        text = INDICATOR_CARD_TEMPLATE.format(
            name=card.get("name") or plan.entity_name,
            evaluated_tasks=self._format_ref_names(card.get("evaluated_tasks")),
            evaluated_projects=self._format_ref_names(card.get("evaluated_projects")),
            related_docs=self._format_related_docs(card.get("related_docs")),
        )
        return GraphEvidence(
            text=text,
            doc_ids=self._doc_ids_from_items(card.get("related_docs") or []),
        )

    async def _collect_industry_detail(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> GraphEvidence:
        if not plan.entity_name:
            return GraphEvidence()

        entity_key = await self._call_graph(
            graph_service.resolve_entity_by_exact_name,
            plan.entity_name,
            "Industry",
            acl_tokens=acl_tokens,
        )
        if not entity_key:
            return GraphEvidence()

        card = await self._call_graph(
            graph_service.get_industry_card,
            entity_key,
            acl_tokens=acl_tokens,
        )
        if not card:
            return GraphEvidence()

        text = INDUSTRY_CARD_TEMPLATE.format(
            name=card.get("name") or plan.entity_name,
            supported_by_policies=self._format_ref_names(card.get("supported_by_policies")),
            located_in=self._format_name_list(card.get("located_in")),
            managed_by=self._format_name_list(card.get("managed_by")),
            related_docs=self._format_related_docs(card.get("related_docs")),
        )
        return GraphEvidence(
            text=text,
            doc_ids=self._doc_ids_from_items(card.get("related_docs") or []),
        )

    async def _resolve_doc_id(
        self,
        plan: QueryPlan,
        graph_service: GraphQueryService,
        *,
        acl_tokens: list[str] | None = None,
    ) -> str | None:
        if plan.doc_id:
            return plan.doc_id
        if plan.doc_code:
            return await self._call_graph(
                graph_service.find_doc_by_code,
                plan.doc_code,
                acl_tokens=acl_tokens,
            )
        return None

    async def _resolve_entity(
        self,
        graph_service: GraphQueryService,
        entity_name: str,
        entity_label: str | None,
        *,
        acl_tokens: list[str] | None = None,
    ) -> tuple[str, str] | None:
        entities = await self._call_graph(
            graph_service.search_entities,
            entity_name,
            label=entity_label,
            limit=5,
            acl_tokens=acl_tokens,
        )
        if not entities:
            return None

        best = self._pick_best_entity(entities, entity_name)
        if not best:
            return None

        labels = best.get("labels") or []
        props = best.get("properties") or {}
        name = props.get("name") or entity_name
        label = labels[0] if labels else (entity_label or "")
        if not label:
            return None
        return name, label

    async def _call_graph(
        self,
        func,
        *args,
        acl_tokens: list[str] | None = None,
        **kwargs,
    ):
        if acl_tokens is not None:
            kwargs["acl_tokens"] = acl_tokens
        return await func(*args, **kwargs)

    def _pick_best_entity(
        self,
        entities: list[dict[str, Any]],
        candidate: str,
    ) -> dict[str, Any] | None:
        if not entities:
            return None

        candidate_norm = candidate.strip()
        exact = [
            entity
            for entity in entities
            if (entity.get("properties") or {}).get("name") == candidate_norm
        ]
        return exact[0] if exact else entities[0]

    def _pick_best_named_item(
        self,
        items: list[dict[str, Any]],
        candidate: str,
    ) -> dict[str, Any] | None:
        if not items:
            return None

        candidate_norm = candidate.strip()
        exact = [item for item in items if (item.get("name") or "") == candidate_norm]
        if exact:
            return exact[0]

        containing = [
            item
            for item in items
            if candidate_norm and candidate_norm in (item.get("name") or "")
        ]
        return containing[0] if containing else items[0]

    def _matches_any(self, text: str, patterns: list[re.Pattern[str]]) -> bool:
        return any(pattern.search(text) for pattern in patterns)

    def _extract_doc_code(self, text: str) -> str | None:
        return extract_first_doc_code(text)

    def _extract_named_segment(
        self,
        text: str,
        patterns: list[re.Pattern[str]],
    ) -> str | None:
        for pattern in patterns:
            match = pattern.search(text)
            if match:
                name = match.group("name").strip(" ，。？?、")
                if name:
                    return name
        return None

    def _pick_entity_candidate(self, keywords: list[str]) -> str | None:
        entity_keywords = [kw for kw in keywords if self._infer_entity_label(kw) in {"Organization", "Region"}]
        if entity_keywords:
            return max(entity_keywords, key=len)
        filtered = [kw for kw in keywords if not _TIME_HINT_RE.search(kw)]
        return max(filtered, key=len) if filtered else None

    def _pick_matter_candidate(self, keywords: list[str]) -> str | None:
        return self._pick_generic_candidate(keywords)

    def _pick_theme_candidate(self, keywords: list[str]) -> str | None:
        return self._pick_generic_candidate(keywords)

    def _pick_generic_candidate(self, keywords: list[str]) -> str | None:
        """Pick the longest non-time, non-org/region keyword as a generic candidate."""
        filtered = [
            kw
            for kw in keywords
            if kw and not _TIME_HINT_RE.search(kw) and self._infer_entity_label(kw) is None
        ]
        return max(filtered, key=len) if filtered else None

    def _infer_entity_label(self, text: str | None) -> str | None:
        if not text:
            return None
        if _ORG_HINT_RE.search(text):
            return "Organization"
        if _REGION_HINT_RE.search(text):
            return "Region"
        # Sprint 4: disambiguate new core entity types
        if self._matches_any(text, _PROJECT_HINT_PATTERNS):
            return "Project"
        if self._matches_any(text, _POLICY_HINT_PATTERNS):
            return "Policy"
        if self._matches_any(text, _TASK_HINT_PATTERNS):
            return "Task"
        if self._matches_any(text, _SYSTEM_HINT_PATTERNS):
            return "System"
        if self._matches_any(text, _DATA_RESOURCE_HINT_PATTERNS):
            return "DataResource"
        if self._matches_any(text, _BUDGET_HINT_PATTERNS):
            return "Budget"
        if self._matches_any(text, _INDICATOR_HINT_PATTERNS):
            return "Indicator"
        if self._matches_any(text, _INDUSTRY_HINT_PATTERNS):
            return "Industry"
        return None

    def _format_policy_chain(
        self,
        chain: list[dict[str, Any]],
        edges: list[dict[str, Any]],
    ) -> str:
        doc_map = {doc["doc_id"]: doc for doc in chain if doc.get("doc_id")}
        lines: list[str] = []
        for edge in edges:
            src = doc_map.get(edge.get("from_doc_id"), {"doc_id": edge.get("from_doc_id")})
            tgt = doc_map.get(edge.get("to_doc_id"), {"doc_id": edge.get("to_doc_id")})
            lines.append(f"{self._format_doc_title(src)}\n  → 依据 → {self._format_doc_title(tgt)}")

        if not lines:
            lines = [self._format_doc_title(doc) for doc in chain]
        return "\n".join(lines)

    def _format_revision_history(
        self,
        documents: list[dict[str, Any]],
        edges: list[dict[str, Any]],
    ) -> str:
        doc_map = {doc["doc_id"]: doc for doc in documents if doc.get("doc_id")}
        rel_names = {"AMENDS": "修订", "REPEALS": "废止"}
        lines: list[str] = []
        for edge in edges:
            rel_name = rel_names.get(edge.get("rel_type"), edge.get("rel_type") or "关联")
            src = doc_map.get(edge.get("from_doc_id"), {"doc_id": edge.get("from_doc_id")})
            tgt = doc_map.get(edge.get("to_doc_id"), {"doc_id": edge.get("to_doc_id")})
            lines.append(f"- {self._format_doc_title(src)} → {rel_name} → {self._format_doc_title(tgt)}")

        if not lines:
            lines = [f"- {self._format_doc_title(doc)}" for doc in documents]
        return "\n".join(lines)

    def _format_docs_list(self, docs: list[dict[str, Any]]) -> str:
        lines: list[str] = []
        for doc in docs[:8]:
            title = doc.get("title") or "（无标题）"
            doc_number = doc.get("doc_number") or doc.get("doc_code") or ""
            rel_type = doc.get("rel_type") or ""
            publish_date = doc.get("publish_date") or ""
            extra: list[str] = []
            if doc_number:
                extra.append(doc_number)
            if publish_date:
                extra.append(str(publish_date))
            if rel_type:
                extra.append(rel_type)
            suffix = f"（{'；'.join(extra)}）" if extra else ""
            lines.append(f"- 《{title}》{suffix}")
        return "\n".join(lines)

    def _format_prop_names(
        self,
        items: list[dict[str, Any]] | None,
        *,
        empty: str,
    ) -> str:
        if not items:
            return empty
        names = [item.get("name") or item.get("full_name") for item in items]
        names = [name for name in names if name]
        return "；".join(names) if names else empty

    def _format_gov_docs(self, docs: list[dict[str, Any]] | None) -> str:
        if not docs:
            return "未提取到明确依据文件"
        parts: list[str] = []
        for doc in docs[:5]:
            title = doc.get("title") or "（无标题）"
            doc_code = doc.get("doc_code") or doc.get("doc_number") or ""
            parts.append(f"《{title}》{f'({doc_code})' if doc_code else ''}")
        return "；".join(parts)

    def _doc_ids_from_items(self, items: list[dict[str, Any]]) -> list[str]:
        ordered = [item.get("doc_id") for item in items if item.get("doc_id")]
        return list(dict.fromkeys(ordered))

    def _format_doc_title(self, doc: dict[str, Any]) -> str:
        title = doc.get("title") or "（无标题）"
        doc_code = doc.get("doc_code") or doc.get("doc_number") or ""
        return f"《{title}》{f'({doc_code})' if doc_code else ''}"

    # ── Sprint 4 card formatting helpers ──

    def _format_name_list(self, items: list[str] | None) -> str:
        """Format a list of plain names into a semicolon-separated string."""
        if not items:
            return "无"
        return "；".join(items)

    def _format_ref_names(self, items: list[dict[str, Any]] | None) -> str:
        """Format a list of entity ref dicts (with 'name' key) into a string."""
        if not items:
            return "无"
        names = [item.get("name") or "" for item in items]
        names = [n for n in names if n]
        return "；".join(names) if names else "无"

    def _format_budget_list(self, items: list[dict[str, Any]] | None) -> str:
        """Format budget items."""
        if not items:
            return "无"
        parts: list[str] = []
        for b in items:
            name = b.get("name") or ""
            amount = b.get("amount") or ""
            parts.append(f"{name}({amount})" if amount else name)
        return "；".join(parts) if parts else "无"

    def _format_card_docs(self, docs: list[dict[str, Any]] | None) -> str:
        """Format source_docs from card results."""
        if not docs:
            return "未提取到来源文件"
        parts: list[str] = []
        for doc in docs[:5]:
            title = doc.get("title") or "（无标题）"
            doc_code = doc.get("doc_code") or ""
            parts.append(f"《{title}》{f'({doc_code})' if doc_code else ''}")
        return "；".join(parts)

    def _format_related_docs(self, docs: list[dict[str, Any]] | None) -> str:
        """Format related_docs (2-hop heuristic docs, may be empty)."""
        if not docs:
            return "暂无相关文档"
        parts: list[str] = []
        for doc in docs[:5]:
            title = doc.get("title") or "（无标题）"
            doc_code = doc.get("doc_code") or ""
            parts.append(f"《{title}》{f'({doc_code})' if doc_code else ''}")
        return "；".join(parts)