"""Pydantic models for the search API.

混合检索接口的请求/响应数据模型。
定义搜索筛选条件、搜索请求、文档结果、搜索响应，
以及自动补全建议的请求和响应结构。
"""

from __future__ import annotations

from datetime import date
from typing import Any, Literal


# 检索范围枚举：综合搜索 / 仅标题 / 仅正文 / 仅文号
SearchScope = Literal["all", "title", "content", "doc_number"]

from pydantic import BaseModel, ConfigDict, Field


class SearchFilters(BaseModel):
    """Optional facet filters applied to a search request."""

    issuing_org: list[str] | str | None = Field(None, description="发文机关（单个或多个）")
    doc_type: list[str] | str | None = Field(None, description="公文种类（单个或多个）")
    knowledge_category: list[str] | str | None = Field(None, description="知识分类/业务分类")
    document_scene_type: list[str] | str | None = Field(None, description="文档场景类型，如 standard_service_guide")
    signer: str | None = Field(None, description="签发人筛选，支持输入完整或部分签发人")
    publish_year: int | None = Field(None, ge=1900, le=2100, description="发布日期年份，如 2024")
    doc_number: str | None = Field(None, description="文号筛选，支持输入完整或部分文号")
    date_from: date | None = Field(None, description="发布日期起始")
    date_to: date | None = Field(None, description="发布日期截止")
    subject_words: list[str] | None = Field(None, description="主题词列表")
    search_scope: SearchScope = Field("all", description="检索范围：综合/标题/正文/文号")


class SearchRequest(BaseModel):
    """Payload for ``POST /search``."""

    model_config = ConfigDict(populate_by_name=True)

    query: str = Field(..., min_length=1, max_length=500, description="搜索关键词或自然语言问句")
    filters: SearchFilters | None = None
    page: int = Field(1, ge=1)
    page_size: int = Field(20, ge=1, le=100)
    llm: bool = Field(False, description="Enable stricter relevance filtering for LLM context.")
    escape_html: bool = Field(
        True,
        alias="escapeHtml",
        description="Whether to strip HTML highlight tags from returned text.",
    )


class MatchedChunk(BaseModel):
    """A single matched chunk with positional info."""

    text: str = Field("", description="高亮摘要文本")
    page_number: int | None = Field(None, description="所在页码")
    page_numbers: list[int] = Field(default_factory=list, description="跨页页码列表")
    heading_hierarchy: list[str] = Field(default_factory=list, description="段落层级标题")
    element_type: str = Field("", description="元素类型，如 paragraph, table, list 等")
    chunk_index: int | None = Field(None, description="块序号")


class DocumentResult(BaseModel):
    """A single document hit returned in the search response.

    Each result represents a unique piece of content (``content_hash``).
    ``doc_id`` is the best-matching version for the current user.
    ``version_count`` indicates how many document versions share this content.
    """

    doc_id: str
    content_hash: str = Field("", description="内容标识（相同内容共享）")
    version_count: int = Field(1, description="用户可见的同内容文档版本数")
    title: str
    doc_number: str | None = None
    knowledge_category: str | None = None
    issuing_org: str | None = None
    doc_type: str | None = None
    publish_date: str | None = None
    source_system: str | None = None
    source_article_id: str | None = None
    source_attachment_id: str | None = None
    source_site_code: str | None = None
    source_target_code: str | None = None
    source_url: str | None = None
    source_metadata: dict[str, Any] = Field(default_factory=dict)
    related_docs: list[dict[str, Any]] = Field(default_factory=list)
    subject_words: list[str] = Field(default_factory=list, description="主题词列表")
    score: float | None = None
    bm25_score: float | None = Field(None, description="BM25 文本相关性分数")
    knn_score: float | None = Field(None, description="kNN 向量相似度分数")
    highlights: list[str] = Field(default_factory=list)
    matched_chunks: list[MatchedChunk] = Field(default_factory=list, description="命中的文档块详情（含页码、段落信息）")


class SearchResponse(BaseModel):
    """Response envelope for ``POST /search``."""

    total: int
    page: int
    page_size: int
    documents: list[DocumentResult]
    aggregations: dict[str, Any] = Field(default_factory=dict)


class SuggestRequest(BaseModel):
    """Payload for ``POST /search/suggest``."""

    query: str = Field(..., min_length=1, max_length=200)
    size: int = Field(5, ge=1, le=20)


class SuggestResponse(BaseModel):
    """Response envelope for ``POST /search/suggest``."""

    suggestions: list[str] = Field(default_factory=list)
