"""Unit tests for ResearchEngine graph-evidence integration."""

from __future__ import annotations

from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock

import pytest

from app.core.graph_query_planner import GraphEvidence, QueryIntent, QueryPlan
from app.api.schemas.research import ResearchPlan, ResearchTask
from app.core.research_engine import ResearchEngine


def _make_doc(doc_id: str, title: str) -> dict:
    return {
        "doc_id": doc_id,
        "title": title,
        "doc_number": f"{doc_id}-CODE",
        "issuing_org": "测试机构",
        "publish_date": "2024-01-01",
        "passages": ["测试摘录"],
        "score": 1.0,
    }


async def _collect_chunks(
    engine: ResearchEngine,
    question: str,
    *,
    seed_doc_ids: list[str] | None = None,
    mode: str = "research",
) -> tuple[list, list[dict[str, str]]]:
    captured: dict[str, list[dict[str, str]]] = {}

    async def fake_chat(messages, **_kwargs):
        captured["messages"] = messages
        for token in ["结论", "。"]:
            yield token

    engine._llm.chat = fake_chat
    chunks = [
        chunk
        async for chunk in engine._stream(
            question,
            SimpleNamespace(),
            session_id=None,
            seed_doc_ids=seed_doc_ids,
            mode=mode,
        )
    ]
    return chunks, captured["messages"]


def _make_engine(
    planner: MagicMock,
    graph_service: MagicMock,
    *,
    session_store: MagicMock | None = None,
) -> ResearchEngine:
    llm = MagicMock()
    llm.chat_json = AsyncMock(
        return_value={
            "keywords": ["建设用地审批"],
            "organizations": [],
            "regions": [],
        }
    )
    es_client = MagicMock()
    es_client.search_service_guides = AsyncMock(return_value=[])

    if session_store is None:
        redis = MagicMock()
        redis.raw = MagicMock()
        redis.raw.get = AsyncMock(return_value=None)
        redis.raw.setex = AsyncMock()
        engine = ResearchEngine(
            es_client=es_client,
            embedding_service=MagicMock(),
            graph_service=graph_service,
            llm_client=llm,
            redis_client=redis,
            planner=planner,
        )
    else:
        engine = ResearchEngine(
            es_client=es_client,
            embedding_service=MagicMock(),
            graph_service=graph_service,
            llm_client=llm,
            session_store=session_store,
            planner=planner,
        )
    engine._es_search = AsyncMock(return_value=[_make_doc("doc-es", "ES 文档")])
    engine._fetch_graph_docs = AsyncMock(return_value=[_make_doc("doc-graph", "图谱文档")])
    return engine


@pytest.mark.asyncio
async def test_research_engine_injects_graph_evidence() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(
        return_value=QueryPlan(
            intent=QueryIntent.MATTER_DETAIL,
            matter_query="建设用地审批",
        )
    )
    planner.collect_evidence = AsyncMock(
        return_value=GraphEvidence(
            text="## 图谱结构化证据\n\n### 事项：建设用地审批\n- 所需材料：营业执照副本",
            doc_ids=["doc-graph"],
        )
    )

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)
    chunks, messages = await _collect_chunks(engine, "建设用地审批需要什么材料")

    assert any(
        chunk.type == "thinking" and chunk.content == "正在查询图谱结构化证据…"
        for chunk in chunks
    )
    assert "图谱结构化证据" in messages[-1]["content"]
    assert engine._fetch_graph_docs.await_count == 1
    doc_ids = engine._fetch_graph_docs.await_args.args[0]
    assert doc_ids == ["doc-graph"]


@pytest.mark.asyncio
async def test_research_engine_general_path_skips_planner_collection() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(return_value=QueryPlan(intent=QueryIntent.GENERAL))
    planner.collect_evidence = AsyncMock()

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)
    chunks, messages = await _collect_chunks(engine, "清远市2024年经济发展情况")

    assert not any(
        chunk.type == "thinking" and chunk.content == "正在查询图谱结构化证据…"
        for chunk in chunks
    )
    planner.collect_evidence.assert_not_awaited()
    assert "图谱结构化证据" not in messages[-1]["content"]


@pytest.mark.asyncio
async def test_research_engine_degrades_when_planner_collection_fails() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(
        return_value=QueryPlan(
            intent=QueryIntent.POLICY_CHAIN,
            doc_code="清政办〔2024〕3号",
        )
    )
    planner.collect_evidence = AsyncMock(side_effect=RuntimeError("boom"))

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)
    chunks, messages = await _collect_chunks(engine, "清政办〔2024〕3号的上位法依据是什么")

    assert any(chunk.type == "done" for chunk in chunks)
    assert not any(chunk.type == "error" for chunk in chunks)
    assert "图谱结构化证据" not in messages[-1]["content"]


@pytest.mark.asyncio
async def test_research_engine_degrades_when_graph_doc_discovery_fails() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(return_value=QueryPlan(intent=QueryIntent.GENERAL))
    planner.collect_evidence = AsyncMock()

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(side_effect=RuntimeError("boom"))

    engine = _make_engine(planner, graph)
    chunks, _messages = await _collect_chunks(engine, "营商环境优化相关政策")

    assert any(chunk.type == "done" for chunk in chunks)
    assert not any(chunk.type == "error" for chunk in chunks)


@pytest.mark.asyncio
async def test_research_engine_includes_seed_docs_in_context() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(return_value=QueryPlan(intent=QueryIntent.GENERAL))
    planner.collect_evidence = AsyncMock()

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)
    engine._fetch_graph_docs = AsyncMock(return_value=[_make_doc("doc-seed", "导入文档")])

    chunks, messages = await _collect_chunks(
        engine,
        "请结合导入资料分析政策执行要求",
        seed_doc_ids=["doc-seed"],
    )

    assert any(
        chunk.type == "thinking" and chunk.content == "正在载入导入资料…"
        for chunk in chunks
    )
    assert any(
        chunk.type == "reference" and chunk.doc_id == "doc-seed"
        for chunk in chunks
    )
    assert "导入文档" in messages[-1]["content"]
    assert engine._fetch_graph_docs.await_args.args[0] == ["doc-seed"]


@pytest.mark.asyncio
async def test_research_engine_supports_qa_mode() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(return_value=QueryPlan(intent=QueryIntent.GENERAL))
    planner.collect_evidence = AsyncMock()

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)
    chunks, messages = await _collect_chunks(
        engine,
        "这份文件的核心要求是什么",
        mode="qa",
    )

    assert any(
        chunk.type == "thinking" and chunk.content == "正在分析问答问题…"
        for chunk in chunks
    )
    assert "智能问答助手" in messages[0]["content"]
    assert any(chunk.type == "done" for chunk in chunks)


@pytest.mark.asyncio
async def test_research_engine_uses_service_guide_evidence_without_chunk_hits() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(
        return_value=QueryPlan(
            intent=QueryIntent.MATTER_DETAIL,
            matter_query="建设用地审批",
        )
    )
    planner.collect_evidence = AsyncMock(return_value=GraphEvidence())

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)
    engine._es_search = AsyncMock(return_value=[])
    engine._fetch_graph_docs = AsyncMock(return_value=[])
    engine._es.search_service_guides = AsyncMock(
        return_value=[
            {
                "_score": 8.5,
                "_source": {
                    "doc_id": "guide-doc-1",
                    "matter_name": "建设用地审批",
                    "implementation_code": "GUIDE-001",
                    "linked_matter_ids": ["matter_1"],
                    "material_names": ["申请表", "营业执照副本"],
                    "fee_names": ["工本费"],
                    "window_names": ["政务服务中心综合窗口"],
                    "consultation_and_supervision": {"consultation_phones": ["12345"]},
                    "process_info": {"step_titles": ["受理", "审核", "办结"]},
                    "document_info": {"title": "建设用地审批办事指南"},
                },
            }
        ]
    )

    chunks, messages = await _collect_chunks(engine, "建设用地审批收费是多少")

    assert any(chunk.type == "reference" and chunk.doc_id == "guide-doc-1" for chunk in chunks)
    assert "办事指南结构化证据" in messages[-1]["content"]
    assert any(chunk.type == "done" for chunk in chunks)


@pytest.mark.asyncio
async def test_research_engine_passes_acl_tokens_to_graph_calls() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(
        return_value=QueryPlan(
            intent=QueryIntent.MATTER_DETAIL,
            matter_query="建设用地审批",
        )
    )
    planner.collect_evidence = AsyncMock(return_value=GraphEvidence())

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)

    async def fake_chat(messages, **_kwargs):
        del messages
        yield "结论"

    engine._llm.chat = fake_chat

    chunks = [
        chunk
        async for chunk in engine._stream(
            "建设用地审批需要什么材料",
            SimpleNamespace(user_id="user-1", acl_tokens=["D_01"]),
            session_id=None,
        )
    ]

    assert any(chunk.type == "done" for chunk in chunks)
    assert planner.collect_evidence.await_args.kwargs["acl_tokens"] == ["D_01"]
    assert graph.search_graph_for_docs.await_args.kwargs["acl_tokens"] == ["D_01"]


@pytest.mark.asyncio
async def test_research_engine_builds_structured_plan() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(return_value=QueryPlan(intent=QueryIntent.GENERAL))
    planner.collect_evidence = AsyncMock()

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    llm = MagicMock()
    llm.chat_json = AsyncMock(
        side_effect=[
            {
                "keywords": ["数字政府"],
                "organizations": ["政务服务中心"],
                "regions": [],
            },
            {
                "summary": "围绕数字政府建设梳理目标、任务与执行要点。",
                "objectives": ["明确目标", "归纳措施"],
                "sub_questions": ["目标是什么", "执行抓手是什么"],
                "retrieval_focus": ["数字政府", "执行要求"],
                "section_outline": ["研究结论", "政策依据"],
                "expected_deliverables": ["执行摘要"],
                "notes": ["需注意材料时效性"],
            },
        ]
    )

    redis = MagicMock()
    redis.raw = MagicMock()
    redis.raw.get = AsyncMock(return_value=None)
    redis.raw.setex = AsyncMock()

    engine = ResearchEngine(
        es_client=MagicMock(),
        embedding_service=MagicMock(),
        graph_service=graph,
        llm_client=llm,
        redis_client=redis,
        planner=planner,
    )

    task = ResearchTask(
        topic="数字政府建设",
        question="数字政府建设的主要目标是什么？",
        goal="梳理目标与主要措施",
        output_template="policy_brief",
        depth_level="deep",
    )

    plan = await engine.build_plan(task, seed_doc_ids=["doc-001"])

    assert plan.summary
    assert plan.section_outline == ["研究结论", "政策依据"]
    assert plan.included_doc_ids == ["doc-001"]


@pytest.mark.asyncio
async def test_research_engine_run_deep_research_emits_structured_events() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(return_value=QueryPlan(intent=QueryIntent.GENERAL))
    planner.collect_evidence = AsyncMock()

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)
    engine._extract_keywords = AsyncMock(return_value=["数字政府", "执行要求"])
    engine._llm.chat_json = AsyncMock(
        return_value={
            "executive_summary": "总体上以提升治理能力和服务效能为目标。[1]",
            "findings": [
                {
                    "title": "目标导向明确",
                    "content": "文件强调提升整体协同能力。[1]",
                    "strength": "high",
                    "source_indices": [1],
                }
            ],
            "conflicts": [
                {
                    "title": "执行口径仍待统一",
                    "content": "部分材料缺少统一执行边界。[1]",
                    "severity": "medium",
                    "source_indices": [1],
                }
            ],
            "open_questions": [
                {
                    "question": "地方实施细则是否已全部出台？",
                    "reason": "当前材料中没有完整覆盖。",
                }
            ],
            "sections": [
                {
                    "title": "政策依据",
                    "summary": "归纳主要依据",
                    "content": "根据 [1]，当前以协同治理为主线。",
                    "source_indices": [1],
                }
            ],
            "one_page_summary": "一页式摘要内容",
            "recommended_next_steps": ["补充地方配套文件", "核查最新执行通告"],
        }
    )

    task = ResearchTask(
        topic="数字政府建设",
        question="数字政府建设的主要目标是什么？",
        goal="梳理目标与执行要求",
        output_template="comprehensive",
        depth_level="standard",
    )
    plan = ResearchPlan(
        summary="围绕数字政府建设梳理目标、政策依据和执行要求。",
        objectives=["明确目标"],
        sub_questions=["主要目标是什么？"],
        retrieval_focus=["数字政府", "执行要求"],
        section_outline=["研究结论", "政策依据"],
        expected_deliverables=["执行摘要"],
        notes=[],
        included_doc_ids=[],
        included_matter_ids=[],
    )

    chunks = [
        chunk
        async for chunk in engine.run_deep_research(
            task,
            plan,
            SimpleNamespace(),
            session_id=None,
        )
    ]

    assert any(chunk.type == "plan" for chunk in chunks)
    assert any(chunk.type == "summary" for chunk in chunks)
    assert any(chunk.type == "finding" and chunk.title == "目标导向明确" for chunk in chunks)
    assert any(chunk.type == "conflict" and chunk.title == "执行口径仍待统一" for chunk in chunks)
    assert any(chunk.type == "open_question" and chunk.title == "地方实施细则是否已全部出台？" for chunk in chunks)
    assert any(chunk.type == "section" and chunk.title == "政策依据" for chunk in chunks)
    assert any(chunk.type == "follow_up" for chunk in chunks)
    assert any(chunk.type == "done" for chunk in chunks)


@pytest.mark.asyncio
async def test_research_engine_run_deep_research_includes_service_guide_evidence() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(
        return_value=QueryPlan(
            intent=QueryIntent.MATTER_DETAIL,
            matter_query="建设用地审批",
        )
    )
    planner.collect_evidence = AsyncMock(return_value=GraphEvidence())

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)
    engine._extract_keywords = AsyncMock(return_value=["建设用地审批", "收费"])
    engine._es_search = AsyncMock(return_value=[])
    engine._fetch_graph_docs = AsyncMock(return_value=[])
    engine._es.search_service_guides = AsyncMock(
        return_value=[
            {
                    "_id": "guide-2",
                "_score": 9.0,
                "_source": {
                        "profile_id": "guide-2",
                    "doc_id": "guide-doc-2",
                    "matter_name": "建设用地审批",
                    "implementation_code": "GUIDE-002",
                    "linked_matter_ids": ["matter_2"],
                    "material_names": ["申请表"],
                    "fee_names": ["工本费"],
                    "process_info": {"summary": "受理、审核、办结。"},
                    "consultation_and_supervision": {"consultation_phones": ["12345"]},
                    "document_info": {"title": "建设用地审批办事指南"},
                },
            }
        ]
    )
    engine._llm.chat_json = AsyncMock(
        return_value={
            "executive_summary": "已结合办事指南结构化证据形成结论。",
            "findings": [],
            "conflicts": [],
            "open_questions": [],
            "sections": [],
            "one_page_summary": "",
            "recommended_next_steps": [],
        }
    )

    task = ResearchTask(
        topic="建设用地审批",
        question="建设用地审批收费是多少？",
        goal="确认收费与材料要求",
        output_template="comprehensive",
        depth_level="standard",
    )
    plan = ResearchPlan(
        summary="核对建设用地审批的材料、收费和办理路径。",
        objectives=["确认收费"],
        sub_questions=["收费是多少？"],
        retrieval_focus=["收费", "材料"],
        section_outline=["研究结论"],
        expected_deliverables=["执行摘要"],
        notes=[],
        included_doc_ids=[],
        included_matter_ids=[],
    )

    chunks = [
        chunk
        async for chunk in engine.run_deep_research(
            task,
            plan,
            SimpleNamespace(),
            session_id=None,
        )
    ]

    assert any(
        chunk.type == "source_group" and chunk.payload and chunk.payload.get("group") == "guide"
        for chunk in chunks
    )
    guide_reference = next(
        chunk for chunk in chunks if chunk.type == "reference" and chunk.doc_id == "guide-doc-2"
    )
    assert guide_reference.payload and guide_reference.payload.get("profile_id") == "guide-2"
    assert guide_reference.payload and guide_reference.payload.get("matched_fields") == ["materials", "fees"]
    assert guide_reference.payload and guide_reference.payload.get("matched_field_labels") == ["申请材料", "收费项目"]
    assert guide_reference.payload and guide_reference.payload.get("summary") == "命中申请材料、收费项目"
    llm_messages = engine._llm.chat_json.await_args.args[0]
    assert "办事指南结构化证据" in llm_messages[-1]["content"]
    assert any(chunk.type == "done" for chunk in chunks)


@pytest.mark.asyncio
async def test_research_engine_rerun_section_emits_section_update() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(return_value=QueryPlan(intent=QueryIntent.GENERAL))
    planner.collect_evidence = AsyncMock()

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)
    engine._extract_keywords = AsyncMock(return_value=["数字政府", "政策依据"])
    engine._fetch_graph_docs = AsyncMock(return_value=[_make_doc("doc-section", "章节文档")])
    engine._llm.chat_json = AsyncMock(
        return_value={
            "section": {
                "title": "政策依据",
                "summary": "补充章节摘要",
                "content": "根据 [1]，该章节应围绕政策依据展开。",
                "source_indices": [1],
            },
            "notes": ["补充了章节重点证据"],
        }
    )

    task = ResearchTask(
        topic="数字政府建设",
        question="数字政府建设的主要目标是什么？",
        goal="梳理目标与执行要求",
        output_template="comprehensive",
        depth_level="standard",
    )
    plan = ResearchPlan(
        summary="围绕数字政府建设梳理目标、政策依据和执行要求。",
        objectives=["明确目标"],
        sub_questions=["主要目标是什么？"],
        retrieval_focus=["数字政府", "执行要求"],
        section_outline=["研究结论", "政策依据"],
        expected_deliverables=["执行摘要"],
        notes=[],
        included_doc_ids=[],
        included_matter_ids=[],
    )

    chunks = [
        chunk
        async for chunk in engine.rerun_section(
            task,
            plan,
            "政策依据",
            SimpleNamespace(),
            section_summary="归纳政策依据与目标导向。",
            source_doc_ids=["doc-section"],
            session_id=None,
        )
    ]

    section_chunk = next(chunk for chunk in chunks if chunk.type == "section")
    assert section_chunk.title == "政策依据"
    assert section_chunk.payload and section_chunk.payload.get("rerun") is True
    assert section_chunk.payload and section_chunk.payload.get("source_doc_ids") == ["doc-section"]
    assert any(chunk.type == "follow_up" for chunk in chunks)
    assert any(chunk.type == "done" for chunk in chunks)


@pytest.mark.asyncio
async def test_research_engine_keeps_guide_reference_alongside_source_doc_and_persists_payload() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(
        return_value=QueryPlan(
            intent=QueryIntent.MATTER_DETAIL,
            matter_query="建设用地审批",
        )
    )
    planner.collect_evidence = AsyncMock(return_value=GraphEvidence())

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    session_store = MagicMock()
    session_store.load_session = AsyncMock(return_value=[])
    session_store.save_session = AsyncMock()

    engine = _make_engine(planner, graph, session_store=session_store)
    engine._es_search = AsyncMock(return_value=[_make_doc("shared-doc", "共享原文")])
    engine._fetch_graph_docs = AsyncMock(return_value=[])
    engine._es.search_service_guides = AsyncMock(
        return_value=[
            {
                "_id": "guide-shared-1",
                "_score": 9.3,
                "_source": {
                    "profile_id": "guide-shared-1",
                    "doc_id": "shared-doc",
                    "matter_name": "建设用地审批",
                    "implementation_code": "GUIDE-SHARED-001",
                    "linked_matter_ids": ["matter-shared-1"],
                    "material_names": ["申请表"],
                    "document_info": {"title": "建设用地审批办事指南"},
                },
            }
        ]
    )

    async def fake_chat(messages, **_kwargs):
        del messages
        yield "结论"

    engine._llm.chat = fake_chat

    chunks = [
        chunk
        async for chunk in engine._stream(
            "建设用地审批怎么办理",
            SimpleNamespace(user_id="user-1"),
            session_id="session-1",
        )
    ]

    reference_chunks = [chunk for chunk in chunks if chunk.type == "reference" and chunk.doc_id == "shared-doc"]
    assert len(reference_chunks) == 2
    assert {chunk.payload.get("source_group") for chunk in reference_chunks if chunk.payload} == {"search", "guide"}

    persisted_refs = session_store.save_session.await_args.args[4]
    assert len(persisted_refs) == 2
    guide_ref = next(ref for ref in persisted_refs if ref.get("source_group") == "guide")
    assert guide_ref["profile_id"] == "guide-shared-1"
    assert guide_ref["doc_id"] == "shared-doc"


@pytest.mark.asyncio
async def test_collect_service_guide_evidence_fallback_drops_explicit_doc_ids() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(
        return_value=QueryPlan(
            intent=QueryIntent.MATTER_DETAIL,
            matter_query="建设用地审批",
        )
    )
    planner.collect_evidence = AsyncMock(return_value=GraphEvidence())

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)
    engine._es.search_service_guides = AsyncMock(
        side_effect=[
            [],
            [
                {
                    "_score": 7.8,
                    "_source": {
                        "profile_id": "guide-fallback-1",
                        "doc_id": "guide-doc-fallback-1",
                        "matter_name": "建设用地审批",
                        "implementation_code": "GUIDE-FALLBACK-001",
                        "document_info": {"title": "建设用地审批办事指南"},
                    },
                }
            ],
        ]
    )

    evidence_text, guide_docs = await engine._collect_service_guide_evidence(
        "建设用地审批收费是多少",
        SimpleNamespace(acl_tokens=["D_01"]),
        plan=QueryPlan(intent=QueryIntent.MATTER_DETAIL, matter_query="建设用地审批"),
        doc_ids=["source-doc-1"],
        matter_ids=["matter-1"],
    )

    assert guide_docs
    assert "办事指南结构化证据" in evidence_text
    assert engine._es.search_service_guides.await_count == 2
    first_call = engine._es.search_service_guides.await_args_list[0].kwargs
    second_call = engine._es.search_service_guides.await_args_list[1].kwargs
    assert first_call["doc_ids"] == ["source-doc-1"]
    assert first_call["matter_ids"] == ["matter-1"]
    assert second_call["matter_name"] == "建设用地审批"
    assert "doc_ids" not in second_call


@pytest.mark.asyncio
async def test_qa_search_preserves_source_url_fields() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(return_value=QueryPlan(intent=QueryIntent.GENERAL))
    planner.collect_evidence = AsyncMock()

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=[])

    engine = _make_engine(planner, graph)
    engine._extract_keywords = AsyncMock(return_value=["policy"])
    engine._es_search = AsyncMock(
        return_value=[
            {
                **_make_doc("doc-web", "Web Policy"),
                "source_url": "https://example.com/policy",
                "source_metadata": {"source_url": "https://example.com/from-meta"},
                "source_system": "web",
                "source_site_code": "site-a",
                "source_target_code": "target-a",
            }
        ]
    )
    engine._fetch_graph_docs = AsyncMock(return_value=[])

    result = await engine.qa_search("policy", SimpleNamespace(acl_tokens=[]))

    doc = result["documents"][0]
    assert doc["doc_id"] == "doc-web"
    assert doc["source_url"] == "https://example.com/policy"
    assert doc["source_metadata"] == {"source_url": "https://example.com/from-meta"}
    assert doc["source_system"] == "web"
    assert doc["source_site_code"] == "site-a"
    assert doc["source_target_code"] == "target-a"


@pytest.mark.asyncio
async def test_qa_search_llm_filters_final_merged_docs() -> None:
    planner = MagicMock()
    planner.plan = MagicMock(
        return_value=QueryPlan(
            intent=QueryIntent.MATTER_DETAIL,
            matter_query="policy",
        )
    )
    planner.collect_evidence = AsyncMock(return_value=GraphEvidence())

    graph = MagicMock()
    graph.search_graph_for_docs = AsyncMock(return_value=["doc-low"])

    engine = _make_engine(planner, graph)
    engine._extract_keywords = AsyncMock(return_value=["policy"])
    engine._es_search = AsyncMock(
        return_value=[
            {**_make_doc("doc-top", "Top Policy"), "score": 2.0},
            {**_make_doc("doc-low", "Low Policy"), "score": 0.4},
        ]
    )
    engine._fetch_graph_docs = AsyncMock(return_value=[])
    engine._es.search_service_guides = AsyncMock(
        return_value=[
            {
                "_score": 0.4,
                "_source": {
                    "profile_id": "guide-low",
                    "doc_id": "guide-low",
                    "matter_name": "Low Guide",
                    "document_info": {"title": "Low Guide"},
                },
            }
        ]
    )

    result = await engine.qa_search("policy", SimpleNamespace(acl_tokens=[]), llm=True)

    assert [doc["doc_id"] for doc in result["documents"]] == ["doc-top"]
    assert result["guide_evidence_text"] == ""
    assert engine._fetch_graph_docs.await_count == 0
