o
    `+ i#)                     @   s  d Z ddlZddlmZ ddlmZmZmZ ddlm	Z	 ddl
mZmZmZmZmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZ ddlmZmZ ddl m!Z! ddl"m#Z# edZ$ededZ%dee$ dee$ge%f dee$ fddZ&G dd deZ'dS )zo
Ensemble retriever that ensemble the results of
multiple retrievers by using weighted  Reciprocal Rank Fusion
    N)defaultdict)HashableIterableIterator)chain)AnyCallableOptionalTypeVarcast)#AsyncCallbackManagerForRetrieverRunCallbackManagerForRetrieverRun)Document)BaseRetrieverRetrieverLike)RunnableConfig)ensure_configpatch_config)ConfigurableFieldSpecget_unique_config_specs)model_validator)overrideTH)bounditerablekeyreturnc                 c   s6    t  }| D ]}|| }|vr|| |V  qdS )a  Yield unique elements of an iterable based on a key function.

    Args:
        iterable: The iterable to filter.
        key: A function that returns a hashable key for each element.

    Yields:
        Unique elements of the iterable based on the key function.
    N)setadd)r   r   seenek r#   i/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/langchain/retrievers/ensemble.pyunique_by_key%   s   

r%   c                   @   sz  e Zd ZU dZee ed< ee ed< dZe	ed< dZ
ee ed< edee fd	d
Zeddedeeef defddZe	d%dedee dedee fddZe	d%dedee dedee fddZdededee fddZdededee fddZdddededee dee fddZdddededee dee fd d!Zd"eee  dee fd#d$Z dS )&EnsembleRetrieverae  Retriever that ensembles the multiple retrievers.

    It uses a rank fusion.

    Args:
        retrievers: A list of retrievers to ensemble.
        weights: A list of weights corresponding to the retrievers. Defaults to equal
            weighting for all retrievers.
        c: A constant added to the rank, controlling the balance between the importance
            of high-ranked items and the consideration given to lower-ranked items.
            Default is 60.
        id_key: The key in the document's metadata used to determine unique documents.
            If not specified, page_content is used.
    
retrieversweights<   cNid_keyr   c                 C   s   t dd | jD S )z+List configurable fields for this runnable.c                 s   s     | ]}|j D ]}|V  qqd S N)config_specs).0	retrieverspecr#   r#   r$   	<genexpr>N   s    z1EnsembleRetriever.config_specs.<locals>.<genexpr>)r   r'   selfr#   r#   r$   r-   K   s   zEnsembleRetriever.config_specsbefore)modevaluesc                 C   s,   | dst|d }d| g| |d< |S )Nr(   r'      )getlen)clsr6   Zn_retrieversr#   r#   r$   set_weightsR   s   
zEnsembleRetriever.set_weightsinputconfigkwargsc           	   
   K   s   ddl m} t|}|j|dd |dd|dg | j|di | jd}|jd |fd	|d
p5|  i|}z
| j	|||d}W n t
yV } z||  d }~ww |j|fi | |S )Nr   )CallbackManager	callbacksverboseFtagsmetadatarA   Zinheritable_tagsZ
local_tagsZinheritable_metadataZlocal_metadatanamerun_namerun_managerr=   )langchain_core.callbacksr?   r   	configurer8   rB   rC   on_retriever_startget_namerank_fusion	Exceptionon_retriever_erroron_retriever_end)	r3   r<   r=   r>   r?   callback_managerrH   resultr!   r#   r#   r$   invokeZ   s@   


	
zEnsembleRetriever.invokec           	   
      s   ddl m} t|}|j|dd |dd|dg | j|di | jd}|jd |fd	|d
p6|  i|I d H }z| j	|||dI d H }W n t
y` } z	||I d H   d }~ww |j|fi |I d H  |S )Nr   )AsyncCallbackManagerr@   rA   FrB   rC   rD   rE   rF   rG   )rI   rT   r   rJ   r8   rB   rC   rK   rL   arank_fusionrN   rO   rP   )	r3   r<   r=   r>   rT   rQ   rH   rR   r!   r#   r#   r$   ainvoke   sJ   


	zEnsembleRetriever.ainvokequeryrH   c                C   s   |  ||S )z
        Get the relevant documents for a given query.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        )rM   r3   rW   rH   r#   r#   r$   _get_relevant_documents   s   z)EnsembleRetriever._get_relevant_documentsc                   s   |  ||I dH S )z
        Asynchronously get the relevant documents for a given query.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        N)rU   rX   r#   r#   r$   _aget_relevant_documents   s   z*EnsembleRetriever._aget_relevant_documents)r=   c                   sN    fddt | jD }tt|D ]}dd || D ||< q| |S )z
        Retrieve the results of the retrievers and use rank_fusion_func to get
        the final result.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        c                    6   g | ]\}}| t jd |d  ddqS Z
retriever_r7   )tag)r@   )rS   r   	get_childr.   ir/   r=   rW   rH   r#   r$   
<listcomp>       z1EnsembleRetriever.rank_fusion.<locals>.<listcomp>c                 S   s*   g | ]}t |trttt|d n|qS )page_content)
isinstancestrr   r   r.   docr#   r#   r$   rb      s    )	enumerater'   ranger9   weighted_reciprocal_rankr3   rW   rH   r=   Zretriever_docsr`   r#   ra   r$   rM      s   
zEnsembleRetriever.rank_fusionc                   s\   t j fddt| jD  I dH }tt|D ]}dd || D ||< q| |S )z
        Asynchronously retrieve the results of the retrievers
        and use rank_fusion_func to get the final result.

        Args:
            query: The query to search for.

        Returns:
            A list of reranked documents.
        c                    r[   r\   )rV   r   r^   r_   ra   r#   r$   rb   
  rc   z2EnsembleRetriever.arank_fusion.<locals>.<listcomp>Nc                 S   s$   g | ]}t |tst|d n|qS rd   )rf   r   rh   r#   r#   r$   rb     s    )asynciogatherrj   r'   rk   r9   rl   rm   r#   ra   r$   rU      s   

zEnsembleRetriever.arank_fusion	doc_listsc                    s   t |t jkrd}t|tt t|jD ](\}}t|ddD ]\}} jdu r0|jn|j	j   ||j
  7  < q#qt|}tt|fddd fddd	S )
a  
        Perform weighted Reciprocal Rank Fusion on multiple rank lists.
        You can find more details about RRF here:
        https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf

        Args:
            doc_lists: A list of rank lists, where each rank list contains unique items.

        Returns:
            list: The final aggregated list of items sorted by their weighted RRF
                    scores in descending order.
        z<Number of rank lists must be equal to the number of weights.r7   )startNc                    s    j d u r| jS | j j  S r,   r+   re   rC   ri   r2   r#   r$   <lambda>F  s   
z<EnsembleRetriever.weighted_reciprocal_rank.<locals>.<lambda>Tc                    s"    j d u r
| j S | jj   S r,   rr   rs   Z	rrf_scorer3   r#   r$   rt   M  s
    
)reverser   )r9   r(   
ValueErrorr   floatziprj   r+   re   rC   r*   r   from_iterablesortedr%   )r3   rp   msgZdoc_listweightZrankri   Zall_docsr#   ru   r$   rl      s.   




z*EnsembleRetriever.weighted_reciprocal_rankr,   )!__name__
__module____qualname____doc__listr   __annotations__rx   r*   intr+   r	   rg   propertyr   r-   r   classmethoddictr   r;   r   r   r   rS   rV   r   rY   r   rZ   rM   rU   rl   r#   r#   r#   r$   r&   6   s   
 $(


-
*
r&   )(r   rn   collectionsr   collections.abcr   r   r   	itertoolsr   typingr   r   r	   r
   r   rI   r   r   Zlangchain_core.documentsr   Zlangchain_core.retrieversr   r   Zlangchain_core.runnablesr   Zlangchain_core.runnables.configr   r   Zlangchain_core.runnables.utilsr   r   Zpydanticr   Ztyping_extensionsr   r   r   r%   r&   r#   r#   r#   r$   <module>   s$    (