
    i2                       d Z ddlmZ ddlmZmZ ddlmZmZm	Z	 ddl
mZmZmZmZ ddlmZ ddlmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'  e$e(          Z) eddg          Z*d0dZ+d1dZ,e*-                    de          d2d'            Z.e*/                    d(          d3d*            Z0e*-                    d+e          d4d.            Z1d/S )5u  Search endpoints – hybrid BM25 + vector search with permission filtering.

混合检索接口模块。
支持 BM25 文本检索与 kNN 向量检索的 RRF 融合排序，
内置 ACL 权限过滤、content_hash 去重、多维度 facet 筛选及结果缓存。
    )annotations)	AnnotatedAny)	APIRouterDependsRequest)UserContextget_current_userget_es_clientget_redis_clientsettings)DocumentResultSearchRequestSearchResponseSuggestRequestSuggestResponse)EmbeddingService)PermissionService)SearchEngine)EmbeddingClient)ESClient)RedisClient)
get_logger)get_cached_searchset_cached_searchz/searchsearch)prefixtagsrequestr   returnr   c                   K   | j         j        j        }| j         j        j        }| j         j        j        }t          ||j                  }t          ||          S )u  Resolve the SearchEngine from app.state clients.

    Must be async so FastAPI runs it in the event loop (not a thread pool).
    This matters because EmbeddingService.__init__ creates asyncio.Semaphore,
    which requires a running event loop in Python 3.9.

    EmbeddingService 接收 Redis 实例以启用向量缓存（TTL 1h），
    避免相同查询重复调用外部 Embedding API。
    )redis)	es_clientembedding_service)appstater$   embedding_clientredis_clientr   rawr   )r    r$   r(   r)   r%   s        +D:\work\zm-rag\backend\app\api\v1\search.py_get_search_enginer,   %   s[       "++5I(/(9(J ' 1 >L()9AQRRR)?PQQQQ    r   c                H   K   | j         j        j        }t          |          S )z5Resolve the PermissionService from app.state clients.)r)   )r&   r'   r)   r   )r    r)   s     r+   _get_permission_servicer/   6   s$       ' 1 >L,7777r-    )response_modelbodyr   user1Annotated[UserContext, Depends(get_current_user)]search_engine4Annotated[SearchEngine, Depends(_get_search_engine)]perm_service>Annotated[PermissionService, Depends(_get_permission_service)]r)   1Annotated[RedisClient, Depends(get_redis_client)]r   c           
       K   |                     |           d{V }t                              d|j        | j        dd         | j                   i }| j        r| j        j        r| j        j        |d<   | j        j        r| j        j        |d<   | j        j	        r| j        j	        |d<   | j        j
        r| j        j
        |d<   | j        j        r?| j        j                                        r!| j        j                                        |d	<   | j        j        r| j        j        |d
<   | j        j        r?| j        j                                        r!| j        j                                        |d<   | j        j        rTt!          | j        j        d          r| j        j                                        nt%          | j        j                  |d<   | j        j        rTt!          | j        j        d          r| j        j                                        nt%          | j        j                  |d<   | j        j        r| j        j        |d<   | j        j        dk    r| j        j        |d<   ddlm} d}|j        dk    r-t3          |j        |j        | j        || j                   d{V }|r2t                              d|j        | j        dd                    |}	nh|                    | j        ||| j        | j                   d{V }	|j        dk    r.t=          |j        |j        | j        || j        |	           d{V  d |	                    dg           D             }
tA          |	                    dtC          |
                    | j        | j        |
|	                    di                     S )a0  Execute a hybrid (BM25 + kNN) search with RRF fusion.

    - Applies ACL permission filter (acl_ids terms match)
    - Groups results by content_hash for deduplication
    - Selects the best version per content_hash (most direct permission match)
        - Supports facet filters (issuing_org, doc_type, knowledge_category, document_scene_type,
            signer, publish_year, doc_number, date_range, subject_words)
    - Caches search results per user (TTL 2 min) for performance
    - Returns highlighted snippets, version_count, and aggregation stats
    Nsearch_request2   )user_idquerypageissuing_orgdoc_typeknowledge_categorydocument_scene_typesignerpublish_year
doc_number	isoformat	date_fromdate_tosubject_wordsallsearch_scoper   r   search_cache_hit(   )r=   r>   )r>   permfiltersr?   	page_sizec                &   g | ]}t          |d          |                    dd          |                    dd          |                    dd          |                    d          |                    d          |                    d          |                    d	          |                    d
          |                    d          |                    d          |                    d          |                    dg                     S )doc_idcontent_hashr0   version_count   titlerF   rB   r@   rA   publish_datescore
bm25_score	knn_score
highlights)rS   rT   rU   rW   rF   rB   r@   rA   rX   rY   rZ   r[   r\   )r   get).0docs     r+   
<listcomp>z!hybrid_search.<locals>.<listcomp>   s          	x=44''/155'''2&&ww|,,"ww';<<..WWZ((00'''""ww|,,ggk**ww|R00	
 	
 	
  r-   	documentstotalaggregations)rb   r?   rQ   ra   rc   )"resolveloggerinfor=   r>   r?   rP   r@   rA   rB   rC   rD   striprE   rF   rH   hasattrrG   strrI   rJ   rL   
app.configr   search_cache_ttlr   r*   debugr   rQ   r   r]   r   len)r2   r3   r5   r7   r)   rO   rP   _scached_resultresultra   s              r+   hybrid_searchrq   <   s6     & %%d++++++++D
KKj"oY	     !G| @<# 	>%)\%=GM"<  	8"&,"7GJ<* 	L,0L,KG()<+ 	N-1\-MG)*< 	<4<#6#<#<#>#> 	< $ 3 9 9 ; ;GH<$ 	@&*l&?GN#<" 	Dt|'>'D'D'F'F 	D$(L$;$A$A$C$CGL!<! 	 4<1;??1&00222/00 K 
 < 	 4</==/$..000-.. I
 <% 	B'+|'AGO$<$--&*l&?GN# *)))))M	Q/dlDJ
 
 
 
 
 
 
 
  'TZPSQSPS_UUU %++*n , 
 
 
 
 
 
 
 
 ""# $,
GTYPV        
   ::k2..!  I& jj#i..11Y.ZZ33   r-   z/filter-optionsdict[str, Any]c                  K   ddl m}  |            }|j        j        j        |                    |            d{V }d |j        D             }d |j        D             }g }dfd}		 |                                 |	t          j
        d           d{V }
t          d |
D                       s |	t          j        d           d{V }
t          d |
D             d           dd         }d |D             }n@# t          $ r3}t                              dt#          |                     Y d}~nd}~ww xY w||ddddddg|dS )u  Return available filter options for the search UI.

    Exposes ``knowledge_categories`` and ``doc_types`` from graph_schema.yaml,
    plus top ``subject_words`` from accessible documents, so the frontend
    doesn't hardcode common filter options.

    从 graph_schema.yaml 读取知识分类和公文种类列表，
    并从用户有权访问的已完成文档中聚合常用主题词，供前端搜索筛选面板动态渲染，避免硬编码。
    r   )
get_schemaNc                    g | ]}||d S labelvalue )r^   names     r+   r`   z"filter_options.<locals>.<listcomp>   s0        &&  r-   c                    g | ]}||d S rv   rz   )r^   dts     r+   r`   z"filter_options.<locals>.<listcomp>   s0        r""  r-   indexri   #include_completed_or_missing_statusboolr!   list[dict[str, Any]]c                 K   g}|r)|                     ddddiidddddiigiigdd	i           j                            | d
dd|iiddddddidiid           d {V }t          |t                    r|n|j        }|                    di                               di                               dg           S )Nr   termstatus	completedmust_notexistsfieldrV   )shouldminimum_should_matchr   filterrJ   termsr<   _countdescr   sizeorderr   r>   aggsr~   r2   rc   buckets)appendr*   r   
isinstancedictr2   r]   )r~   r   query_filtersresponser*   
acl_filterr$   s        r+   _search_subject_word_bucketsz4filter_options.<locals>._search_subject_word_buckets   sL     
 0:l. 	  #h%<=#jHw>Q3R2S%TU# 12 
 
 
 #-- - $%4$&&.%7" "&  . 
 
 
 
 
 
 
 
( %Xt44Ghh(-ww~r**..CCGG	SUVVVr-   T)r   c              3     K   | ]:}t          |                    d           pd                                          V  ;dS keyr0   Nri   r]   rg   r^   buckets     r+   	<genexpr>z!filter_options.<locals>.<genexpr>  sG      NNF3vzz%((.B//5577NNNNNNr-   Fc              3     K   | ]<}t          |                    d           pd                                          8|V  =dS r   r   r   s     r+   r   z!filter_options.<locals>.<genexpr>  sa        !vzz%((.B//5577     r-   c                    t          |                     d          pd           t          |                     d          pd          fS )N	doc_countr   r   r0   )intr]   ri   )r   s    r+   <lambda>z filter_options.<locals>.<lambda>  sB    VZZ%<%<%A!B!B BC

SXHYHYH_]_D`D`a r-   r      c                0    g | ]}|d          |d          dS )r   rw   rz   r   s     r+   r`   z"filter_options.<locals>.<listcomp>  s8     
 
 
 UmfUm<<
 
 
r-   *search_filter_options_subject_words_failederroru   标准办事指南standard_service_guiderw   u   其他other)knowledge_categories	doc_typesdocument_scene_typesrJ   )r~   ri   r   r   r!   r   )app.core.graph_schema_loaderrt   r&   r'   r$   rd   knowledge_category_mappingr   build_es_filterr   es_meta_indexanyes_chunk_indexsorted	Exceptionre   warningri   )r3   r    r7   rt   schemarO   
categoriesr   rJ   r   r   ranked_subject_wordsexcr   r$   s                @@r+   filter_optionsr      st      877777Z\\F!++5I%%d++++++++D 5  J "  I
 +-M(W (W (W (W (W (W (WTU))++
44"04
 
 
 
 
 
 
 
 
 NNgNNNNN 	88'49        G
  & %,   ba 
  
  
 2# 
 
.
 
 
  U U UC3s88TTTTTTTTU !+*5MNN11!
 '  s   -BD 
E)D==Ez/suggestr   r   c                  K   |j         j        j        }t          |           d{V }|                    |           d{V }|                                }	 |j                            t          j	        dddd| j
        ddiig|gdid	d
d| j        ddidiid           d{V }t          |t                    r|n|j        }|                    di                               d	i                               dg           }	d |	D             }
nB# t           $ r5}t"                              dt'          |                     g }
Y d}~nd}~ww xY wt)          |
          S )zReturn query auto-complete suggestions based on document titles.

    Uses ES prefix/completion suggestion on the title field,
    filtered by the user's permissions.
    Nr   r   match_phrase_prefixrW   r   )r>   max_expansions)mustr   title_suggestionsr   ztitle.keywordr   r   r   r   r   rc   r   c                    g | ]
}|d          S r   rz   )r^   bs     r+   r`   zsuggest.<locals>.<listcomp>T  s    111Aqx111r-   suggest_errorr   )suggestions)r&   r'   r$   r/   rd   r   r*   r   r   r   r>   r   r   r   r2   r]   r   re   r   ri   r   )r2   r3   r    r$   r7   rO   r   r   r*   r   r   es               r+   suggestr      s      "++5I099999999L%%d++++++++D%%''J'"--( !6$+15:<.& .&8"	! $.,   (%4$(I&.%7" "*#  . 
 
 
 
 
 
 
 
> %Xt44Ghh(-''."--112ErJJNNyZ\]]11111   c!ff555 {3333s   B:D 
E+EEN)r    r   r!   r   )r    r   r!   r   )r2   r   r3   r4   r5   r6   r7   r8   r)   r9   r!   r   )r3   r4   r    r   r7   r8   r!   rr   )r2   r   r3   r4   r    r   r!   r   )2__doc__
__future__r   typingr   r   fastapir   r   r   app.api.depsr	   r
   r   r   rj   r   app.api.schemas.searchr   r   r   r   r   app.core.embeddingr   app.core.permissionr   app.core.search_enginer   #app.infrastructure.embedding_clientr   app.infrastructure.es_clientr   app.infrastructure.redis_clientr   app.utils.loggerr   app.utils.query_cacher   r   __name__re   routerr,   r/   postrq   r]   r   r   rz   r-   r+   <module>r      sb    # " " " " " ! ! ! ! ! ! ! ! / / / / / / / / / / W W W W W W W W W W W W                    0 / / / / / 1 1 1 1 1 1 / / / / / / ? ? ? ? ? ? 1 1 1 1 1 1 7 7 7 7 7 7 ' ' ' ' ' ' F F F F F F F F	H			)8*	5	5	5R R R R"8 8 8 8 R//r r r 0/rj j j j jZ Z8894 94 94 9894 94 94r-   