from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock

import pytest

import app.core.search_engine as search_engine_module
from app.core.permission import PermissionContext
from app.core.search_engine import SearchEngine, _filter_llm_relevance


def _make_engine() -> SearchEngine:
    return SearchEngine(es_client=MagicMock(), embedding_service=MagicMock())


def _make_perm() -> PermissionContext:
    return PermissionContext(user_id="user-1", acl_tokens=["U_user-1", "A_01"])


def test_build_query_supports_category_filter_and_title_scope():
    engine = _make_engine()

    body = engine._build_query(
        query_text="机构改革",
        query_vector=None,
        perm=_make_perm(),
        filters={
            "knowledge_category": ["政策文件"],
            "search_scope": "title",
        },
        page=1,
        page_size=20,
    )

    must_clause = body["query"]["bool"]["must"][0]
    filter_clause = body["query"]["bool"]["filter"][0]
    assert must_clause == {"match": {"title": {"query": "机构改革"}}}
    assert {"terms": {"knowledge_category": ["政策文件"]}} in filter_clause["bool"]["must"]


def test_build_query_supports_doc_number_scope_and_partial_match():
    engine = _make_engine()

    body = engine._build_query(
        query_text="国办发〔2024〕",
        query_vector=[0.1, 0.2],
        perm=_make_perm(),
        filters={
            "doc_number": "〔2024〕",
            "search_scope": "doc_number",
        },
        page=1,
        page_size=20,
    )

    must_clause = body["query"]["bool"]["must"][0]
    filter_clause = body["query"]["bool"]["filter"][0]
    assert "hybrid" not in body.get("query", {})
    assert must_clause == {
        "wildcard": {
            "doc_number": {
                "value": "*国办发〔2024〕*",
            }
        }
    }
    assert {
        "wildcard": {
            "doc_number": {
                "value": "*〔2024〕*",
            }
        }
    } in filter_clause["bool"]["must"]


def test_build_query_supports_scene_signer_and_publish_year_filters():
    engine = _make_engine()

    body = engine._build_query(
        query_text="办事指南",
        query_vector=None,
        perm=_make_perm(),
        filters={
            "document_scene_type": ["standard_service_guide"],
            "signer": "张三",
            "publish_year": 2024,
        },
        page=1,
        page_size=20,
    )

    filter_clause = body["query"]["bool"]["filter"][0]
    must_filters = filter_clause["bool"]["must"]

    assert {"terms": {"document_scene_type": ["standard_service_guide"]}} in must_filters
    assert {"wildcard": {"signer": {"value": "*张三*"}}} in must_filters
    assert {
        "range": {
            "publish_date": {
                "gte": "2024-01-01",
                "lt": "2025-01-01",
            }
        }
    } in must_filters


def test_build_query_bm25_uses_page_offset():
    engine = _make_engine()

    body = engine._build_query(
        query_text="产教融合",
        query_vector=None,
        perm=_make_perm(),
        filters={},
        page=2,
        page_size=10,
    )

    assert body["from"] == 10
    assert body["size"] == 10
    assert "collapse" in body


def test_filter_llm_relevance_filters_final_documents_only():
    documents = [
        {"doc_id": "top", "score": 2.0},
        {"doc_id": "kept", "score": 1.0},
        {"doc_id": "relative_drop", "score": 0.99},
        {"doc_id": "fixed_drop", "score": 0.49},
    ]

    filtered = _filter_llm_relevance(documents)

    assert [item["doc_id"] for item in filtered] == ["top", "kept"]


def test_filter_llm_relevance_preserves_seed_documents():
    documents = [
        {"doc_id": "top", "score": 2.0, "_source_type": "search"},
        {"doc_id": "seed", "score": None, "_source_type": "seed"},
        {"doc_id": "drop", "score": 0.4, "_source_type": "search"},
    ]

    filtered = _filter_llm_relevance(
        documents,
        preserve_source_types={"seed"},
    )

    assert [item["doc_id"] for item in filtered] == ["top", "seed"]


def test_build_document_results_returns_related_docs_from_meta_versions():
    engine = _make_engine()
    related_docs = [
        {
            "doc_id": "govcrawler_article_1_attachment_2",
            "title": "attachment.pdf",
            "relation_type": "附件",
        }
    ]

    results = engine._build_document_results(
        content_groups=[{
            "content_hash": "hash-1",
            "doc_ids": ["govcrawler_article_1"],
            "title": "Article",
            "score": 1.0,
        }],
        versions_map={
            "hash-1": [{
                "doc_id": "govcrawler_article_1",
                "content_hash": "hash-1",
                "title": "Article",
                "acl_ids": [],
                "related_docs": related_docs,
                "source_url": "https://www.example.gov.cn/post.html",
            }]
        },
        perm=_make_perm(),
    )

    assert results[0]["related_docs"] == related_docs
    assert results[0]["source_url"] == "https://www.example.gov.cn/post.html"


@pytest.mark.asyncio
async def test_search_llm_filters_after_final_documents_are_built():
    es_client = SimpleNamespace(
        should_use_hybrid=False,
        raw=SimpleNamespace(search=AsyncMock()),
    )
    es_client.raw.search.return_value = {
        "hits": {
            "hits": [
                {
                    "_score": 2.0,
                    "_source": {
                        "content_hash": "hash-top",
                        "doc_ids": ["doc-top"],
                        "title": "Top",
                    },
                    "inner_hits": {"best_chunks": {"hits": {"hits": []}}},
                },
                {
                    "_score": 1.0,
                    "_source": {
                        "content_hash": "hash-kept",
                        "doc_ids": ["doc-kept"],
                        "title": "Kept",
                    },
                    "inner_hits": {"best_chunks": {"hits": {"hits": []}}},
                },
                {
                    "_score": 0.49,
                    "_source": {
                        "content_hash": "hash-drop",
                        "doc_ids": ["doc-drop"],
                        "title": "Drop",
                    },
                    "inner_hits": {"best_chunks": {"hits": {"hits": []}}},
                },
            ]
        },
        "aggregations": {"total_content": {"value": 3}},
    }
    engine = SearchEngine(es_client=es_client, embedding_service=MagicMock())
    engine._fetch_versions = AsyncMock(return_value={})

    result = await engine.search(
        "policy",
        _make_perm(),
        page=1,
        page_size=3,
        llm=True,
    )

    assert [doc["doc_id"] for doc in result["documents"]] == ["doc-top", "doc-kept"]
    assert result["total"] == 3
    assert es_client.raw.search.await_args.kwargs["body"]["from"] == 0
    assert es_client.raw.search.await_args.kwargs["body"]["size"] == 3


@pytest.mark.asyncio
async def test_search_non_llm_keeps_existing_score_threshold(monkeypatch):
    monkeypatch.setattr(search_engine_module.settings, "search_score_threshold", 0.5)
    es_client = SimpleNamespace(
        should_use_hybrid=False,
        raw=SimpleNamespace(search=AsyncMock()),
    )
    es_client.raw.search.return_value = {
        "hits": {
            "hits": [
                {
                    "_score": 0.5,
                    "_source": {
                        "content_hash": "hash-kept",
                        "doc_ids": ["doc-kept"],
                        "title": "Kept",
                    },
                    "inner_hits": {"best_chunks": {"hits": {"hits": []}}},
                },
                {
                    "_score": 0.49,
                    "_source": {
                        "content_hash": "hash-drop",
                        "doc_ids": ["doc-drop"],
                        "title": "Drop",
                    },
                    "inner_hits": {"best_chunks": {"hits": {"hits": []}}},
                },
            ]
        },
        "aggregations": {"total_content": {"value": 2}},
    }
    engine = SearchEngine(es_client=es_client, embedding_service=MagicMock())
    engine._fetch_versions = AsyncMock(return_value={})

    result = await engine.search(
        "policy",
        _make_perm(),
        page=1,
        page_size=2,
        llm=False,
    )

    assert [doc["doc_id"] for doc in result["documents"]] == ["doc-kept"]
    assert result["total"] == 1


def test_extract_aggregations_includes_category_buckets():
    engine = _make_engine()

    aggregations = engine._extract_aggregations({
        "aggregations": {
            "by_category": {
                "buckets": [
                    {
                        "key": "政策文件",
                        "doc_count": 3,
                        "unique_content": {"value": 2},
                    },
                    {
                        "key": "",
                        "doc_count": 1,
                        "unique_content": {"value": 1},
                    },
                ]
            }
        }
    })

    assert aggregations["by_category"] == [{"key": "政策文件", "count": 2}]


def test_extract_aggregations_includes_subject_word_buckets():
    engine = _make_engine()

    aggregations = engine._extract_aggregations({
        "aggregations": {
            "by_subject_words": {
                "buckets": [
                    {
                        "key": "数字政府",
                        "doc_count": 5,
                        "unique_content": {"value": 3},
                    },
                    {
                        "key": "",
                        "doc_count": 1,
                        "unique_content": {"value": 1},
                    },
                ]
            }
        }
    })

    assert aggregations["by_subject_words"] == [{"key": "数字政府", "count": 3}]