"""Focused tests for research_retriever embedding failure → BM25 fallback.

验证 embedding 短暂故障时 research 路径应降级到纯 BM25 搜索，
而非直接返回空结果列表。
"""

from __future__ import annotations

import asyncio
from unittest.mock import AsyncMock, MagicMock, PropertyMock

import pytest

from app.core.research_retriever import ResearchRetriever
from app.core.permission import PermissionContext


def _make_retriever(*, hybrid_available: bool = True) -> ResearchRetriever:
    """Create a ResearchRetriever with mocked dependencies."""
    es = MagicMock()
    es.raw = MagicMock()
    # should_use_hybrid 是 @property，需要通过 type() 设置
    type(es).should_use_hybrid = PropertyMock(return_value=hybrid_available)
    es.hybrid_search = AsyncMock(return_value=(None, False))

    embedding = MagicMock()
    graph_service = MagicMock()
    llm = MagicMock()
    planner = MagicMock()

    retriever = ResearchRetriever(
        es_client=es,
        embedding_service=embedding,
        graph_service=graph_service,
        llm_client=llm,
        planner=planner,
    )
    return retriever


def _mock_perm() -> PermissionContext:
    perm = MagicMock(spec=PermissionContext)
    perm.build_es_filter.return_value = {"match_all": {}}
    return perm


class TestEmbeddingFailureFallback:
    """embed_single 抛异常时应回退到 BM25 而非返回空列表。"""

    def test_bm25_search_executed_on_embedding_failure(self):
        """embedding 失败时仍应执行 ES BM25 搜索。"""
        retriever = _make_retriever(hybrid_available=True)
        # embed_single 抛 RuntimeError
        retriever._embedding.embed_single = AsyncMock(
            side_effect=RuntimeError("embedding service down")
        )
        # BM25 搜索返回有效结果
        retriever._es.raw.search = AsyncMock(
            return_value={
                "hits": {
                    "total": {"value": 1},
                    "hits": [
                        {
                            "_id": "chunk-1",
                            "_score": 1.5,
                            "_source": {
                                "doc_id": "doc-1",
                                "title": "Test",
                                "content": "Test content",
                                "chunk_index": 0,
                            },
                            "highlight": {},
                        }
                    ],
                }
            }
        )

        result = asyncio.get_event_loop().run_until_complete(
            retriever.es_search(
                question="test query",
                perm=_mock_perm(),
            )
        )

        # 验证 BM25 搜索被调用（而非返回空列表）
        retriever._es.raw.search.assert_called_once()
        assert len(result) > 0, "Should return BM25 results, not empty list"

    def test_hybrid_not_called_on_embedding_failure(self):
        """embedding 失败时不应调用 hybrid_search。"""
        retriever = _make_retriever(hybrid_available=True)
        retriever._embedding.embed_single = AsyncMock(
            side_effect=RuntimeError("embedding service down")
        )
        retriever._es.raw.search = AsyncMock(
            return_value={"hits": {"total": {"value": 0}, "hits": []}}
        )

        asyncio.get_event_loop().run_until_complete(
            retriever.es_search(
                question="test query",
                perm=_mock_perm(),
            )
        )

        # hybrid_search 不应被调用
        retriever._es.hybrid_search.assert_not_called()

    def test_normal_hybrid_path_still_works(self):
        """embedding 正常时 hybrid 路径仍应正常工作。"""
        retriever = _make_retriever(hybrid_available=True)
        retriever._embedding.embed_single = AsyncMock(return_value=[0.1] * 768)
        retriever._es.hybrid_search = AsyncMock(
            return_value=(
                {
                    "hits": {
                        "total": {"value": 1},
                        "hits": [
                            {
                                "_id": "chunk-1",
                                "_score": 2.0,
                                "_source": {
                                    "doc_id": "doc-1",
                                    "title": "Test",
                                    "content": "Hybrid result",
                                    "chunk_index": 0,
                                },
                                "highlight": {},
                            }
                        ],
                    }
                },
                True,
            )
        )

        result = asyncio.get_event_loop().run_until_complete(
            retriever.es_search(
                question="test query",
                perm=_mock_perm(),
            )
        )

        retriever._es.hybrid_search.assert_called_once()
        assert len(result) > 0


def test_enrich_from_meta_backfills_source_url_even_when_title_exists():
    retriever = _make_retriever(hybrid_available=False)
    retriever._es.raw.mget = AsyncMock(
        return_value={
            "docs": [
                {
                    "_id": "doc-web",
                    "found": True,
                    "_source": {
                        "title": "Existing title from meta",
                        "source_url": "https://example.com/policy",
                        "source_metadata": {"source_url": "https://example.com/from-meta"},
                        "source_system": "web",
                        "source_site_code": "site-a",
                        "source_target_code": "target-a",
                    },
                }
            ]
        }
    )
    docs = [{"doc_id": "doc-web", "title": "Existing title"}]

    asyncio.get_event_loop().run_until_complete(retriever._enrich_from_meta(docs))

    retriever._es.raw.mget.assert_called_once()
    requested_source = retriever._es.raw.mget.call_args.kwargs["_source"]
    assert "source_url" in requested_source
    assert "source_metadata" in requested_source
    assert docs[0]["title"] == "Existing title"
    assert docs[0]["source_url"] == "https://example.com/policy"
    assert docs[0]["source_metadata"] == {"source_url": "https://example.com/from-meta"}
    assert docs[0]["source_system"] == "web"
    assert docs[0]["source_site_code"] == "site-a"
    assert docs[0]["source_target_code"] == "target-a"
