o
    {qiJ                     @   s2  d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZmZ ddlmZ defddZeeZ ej!ddde"fddZ#defddZ$G dd de%eZ&G dd deZ'G dd de'eZ(G dd de'eZ)dS )z@A chain for comparing the output of two models using embeddings.    N)Enum)util)AnyOptional)	Callbacks)AsyncCallbackManagerForChainRunCallbackManagerForChainRun)
Embeddings)pre_init)
ConfigDictField)Chain)PairwiseStringEvaluatorStringEvaluatorRUN_KEYreturnc               
   C   s6   zdd l } W | S  ty } zd}t||d }~ww )Nr   z@Could not import numpy, please install with `pip install numpy`.)numpyImportError)npemsg r   s/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/langchain/evaluation/embedding_distance/base.py_import_numpy   s   

r      )maxsizec                   C   s    t tdr	dS td dS )Nr   Ta  NumPy not found in the current Python environment. langchain will use a pure Python implementation for embedding distance operations, which may significantly impact performance, especially for large datasets. For optimal speed and efficiency, consider installing NumPy: pip install numpyF)boolr   	find_specloggerwarningr   r   r   r   _check_numpy#   s   r!   c                  C   sb   z
ddl m}  W |  S  ty0   zddlm}  W Y |  S  ty/ } zd}t||d}~ww w )zaCreate an Embeddings object.
    Returns:
        Embeddings: The created Embeddings object.
    r   OpenAIEmbeddingstCould not import OpenAIEmbeddings. Please install the OpenAIEmbeddings package using `pip install langchain-openai`.N)langchain_openair#   r   %langchain_community.embeddings.openai)r#   r   r   r   r   r   _embedding_factory1   s   	
r'   c                   @   s$   e Zd ZdZdZdZdZdZdZdS )EmbeddingDistancea  Embedding Distance Metric.

    Attributes:
        COSINE: Cosine distance metric.
        EUCLIDEAN: Euclidean distance metric.
        MANHATTAN: Manhattan distance metric.
        CHEBYSHEV: Chebyshev distance metric.
        HAMMING: Hamming distance metric.
    ZcosineZ	euclideanZ	manhattanZ	chebyshevZhammingN)	__name__
__module____qualname____doc__COSINE	EUCLIDEAN	MANHATTAN	CHEBYSHEVHAMMINGr   r   r   r   r(   I   s    
r(   c                   @   s6  e Zd ZU dZeedZeed< ee	j
dZe	ed< edeeef deeef fdd	Zed
dZedee fddZdedefddZde	defddZedededefddZedededefddZedededefddZedededefddZedededefddZd edefd!d"Zd#S )$_EmbeddingDistanceChainMixina0  Shared functionality for embedding distance evaluators.

    Attributes:
        embeddings (Embeddings): The embedding objects to vectorize the outputs.
        distance_metric (EmbeddingDistance): The distance metric to use
                                            for comparing the embeddings.
    )default_factory
embeddings)defaultdistance_metricvaluesr   c              
   C   s   | d}g }zddlm} || W n	 ty   Y nw zddlm} || W n	 ty4   Y nw |s=d}t|t|t|r_zddl}W |S  ty^ } zd}t||d}~ww |S )zValidate that the TikTok library is installed.

        Args:
            values (Dict[str, Any]): The values to validate.

        Returns:
            Dict[str, Any]: The validated values.
        r4   r   r"   r$   NzThe tiktoken library is required to use the default OpenAI embeddings with embedding distance evaluators. Please either manually select a different Embeddings object or install tiktoken using `pip install tiktoken`.)	getr%   r#   appendr   r&   
isinstancetupletiktoken)clsr7   r4   types_r#   r   r<   r   r   r   r   _validate_tiktoken_installedg   s:   


	
z9_EmbeddingDistanceChainMixin._validate_tiktoken_installedT)Zarbitrary_types_allowedc                 C   s   dgS )zgReturn the output keys of the chain.

        Returns:
            List[str]: The output keys.
        scorer   selfr   r   r   output_keys   s   z(_EmbeddingDistanceChainMixin.output_keysresultc                 C   s$   d|d i}t |v r|t  |t < |S )Nr@   r   )rB   rD   parsedr   r   r   _prepare_output   s   z,_EmbeddingDistanceChainMixin._prepare_outputmetricc              
   C   sN   t j| jt j| jt j| jt j| jt j	| j
i}||v r|| S d| }t|)zGet the metric function for the given metric name.

        Args:
            metric (EmbeddingDistance): The metric name.

        Returns:
            Any: The metric function.
        zInvalid metric: )r(   r-   _cosine_distancer.   _euclidean_distancer/   _manhattan_distancer0   _chebyshev_distancer1   _hamming_distance
ValueError)rB   rG   Zmetricsr   r   r   r   _get_metric   s   

z(_EmbeddingDistanceChainMixin._get_metricabc              
   C   sF   zddl m} W n ty } zd}t||d}~ww d|| | S )zCompute the cosine distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.ndarray: The cosine distance.
        r   )cosine_similarityzThe cosine_similarity function is required to compute cosine distance. Please install the langchain-community package using `pip install langchain-community`.Ng      ?)Zlangchain_community.utils.mathrQ   r   )rO   rP   rQ   r   r   r   r   r   rH      s   
z-_EmbeddingDistanceChainMixin._cosine_distancec                 C   s:   t  rddl}|j| | S tdd t| |D d S )zCompute the Euclidean distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Euclidean distance.
        r   Nc                 s   s$    | ]\}}|| ||  V  qd S Nr   .0xyr   r   r   	<genexpr>   s   " zC_EmbeddingDistanceChainMixin._euclidean_distance.<locals>.<genexpr>g      ?)r!   r   ZlinalgZnormsumziprO   rP   r   r   r   r   rI      s   z0_EmbeddingDistanceChainMixin._euclidean_distancec                 C   8   t  rt }||| | S tdd t| |D S )zCompute the Manhattan distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Manhattan distance.
        c                 s        | ]\}}t || V  qd S rR   absrS   r   r   r   rW          zC_EmbeddingDistanceChainMixin._manhattan_distance.<locals>.<genexpr>)r!   r   rX   r^   rY   rZ   r   r   r   rJ         z0_EmbeddingDistanceChainMixin._manhattan_distancec                 C   r[   )zCompute the Chebyshev distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Chebyshev distance.
        c                 s   r\   rR   r]   rS   r   r   r   rW     r_   zC_EmbeddingDistanceChainMixin._chebyshev_distance.<locals>.<genexpr>)r!   r   maxr^   rY   rZ   r   r   r   rK      r`   z0_EmbeddingDistanceChainMixin._chebyshev_distancec                 C   s:   t  rt }|| |kS tdd t| |D t|  S )zCompute the Hamming distance between two vectors.

        Args:
            a (np.ndarray): The first vector.
            b (np.ndarray): The second vector.

        Returns:
            np.floating: The Hamming distance.
        c                 s   s     | ]\}}||krd V  qdS )r   Nr   rS   r   r   r   rW     r_   zA_EmbeddingDistanceChainMixin._hamming_distance.<locals>.<genexpr>)r!   r   meanrX   rY   lenrZ   r   r   r   rL   	  s    z._EmbeddingDistanceChainMixin._hamming_distancevectorsc                 C   sh   |  | j}t r't|t jr'||d dd|d dd }t|S ||d |d }t|S )zCompute the score based on the distance metric.

        Args:
            vectors (np.ndarray): The input vectors.

        Returns:
            float: The computed score.
        r   r   )	rN   r6   r!   r:   r   ZndarrayZreshapeitemfloat)rB   rd   rG   r@   r   r   r   _compute_score  s   	&z+_EmbeddingDistanceChainMixin._compute_scoreN) r)   r*   r+   r,   r   r'   r4   r	   __annotations__r(   r-   r6   r
   dictstrr   r?   r   Zmodel_configpropertylistrC   rF   rN   staticmethodrH   rI   rJ   rK   rL   rg   rh   r   r   r   r   r2   [   s0   
 $/r2   c                   @   sF  e Zd ZdZedefddZedefddZede	e fddZ
		dd
eeef dee deeef fddZ		dd
eeef dee deeef fddZd	d	d	d	dddedee dedee	e  deeeef  dededefddZd	d	d	d	dddedee dedee	e  deeeef  dededefddZd	S )EmbeddingDistanceEvalChaina"  Use embedding distances to score semantic difference between
    a prediction and reference.

    Examples:
        >>> chain = EmbeddingDistanceEvalChain()
        >>> result = chain.evaluate_strings(prediction="Hello", reference="Hi")
        >>> print(result)
        {'score': 0.5}
    r   c                 C   s   dS )zReturn whether the chain requires a reference.

        Returns:
            bool: True if a reference is required, False otherwise.
        Tr   rA   r   r   r   requires_reference6  s   z-EmbeddingDistanceEvalChain.requires_referencec                 C      d| j j dS )NZ
embedding_	_distancer6   valuerA   r   r   r   evaluation_name?     z*EmbeddingDistanceEvalChain.evaluation_namec                 C      ddgS )eReturn the input keys of the chain.

        Returns:
            List[str]: The input keys.
        
prediction	referencer   rA   r   r   r   
input_keysC     z%EmbeddingDistanceEvalChain.input_keysNinputsrun_managerc                 C   @   | j |d |d g}t rt }||}| |}d|iS )a0  Compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (Optional[CallbackManagerForChainRun], optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        ry   rz   r@   r4   Zembed_documentsr!   r   arrayrh   rB   r}   r~   rd   r   r@   r   r   r   _callL  s   

z EmbeddingDistanceEvalChain._callc                    H   | j |d |d gI dH }t rt }||}| |}d|iS )a:  Asynchronously compute the score for a prediction and reference.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        ry   rz   Nr@   r4   Zaembed_documentsr!   r   r   rh   r   r   r   r   _acalld     


z!EmbeddingDistanceEvalChain._acallF)rz   	callbackstagsmetadatainclude_run_infory   rz   r   r   r   r   kwargsc          	      K   "   | ||d||||d}|  |S )a  Evaluate the embedding distance between a prediction and
        reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The reference string (required)
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        ry   rz   r}   r   r   r   r   rF   	rB   ry   rz   r   r   r   r   r   rD   r   r   r   _evaluate_strings  s   
z,EmbeddingDistanceEvalChain._evaluate_stringsc          	         ,   | j ||d||||dI dH }| |S )a  Asynchronously evaluate the embedding distance between
        a prediction and reference.

        Args:
            prediction (str): The output string from the first model.
            reference (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   NZacallrF   r   r   r   r   _aevaluate_strings  s   
z-EmbeddingDistanceEvalChain._aevaluate_stringsrR   )r)   r*   r+   r,   rl   r   rp   rk   ru   rm   r{   rj   r   r   r   r   r   r   r   r   r   r   r   r   r   ro   +  s    







	

&
	
ro   c                   @   s(  e Zd ZdZedee fddZedefddZ	dde	ee
f d	ee de	ee
f fd
dZ	dde	ee
f d	ee de	ee
f fddZddddddedededeee  dee	ee
f  dede
de	fddZddddddedededeee  dee	ee
f  dede
de	fddZdS )"PairwiseEmbeddingDistanceEvalChaina  Use embedding distances to score semantic difference between two predictions.

    Examples:
    >>> chain = PairwiseEmbeddingDistanceEvalChain()
    >>> result = chain.evaluate_string_pairs(prediction="Hello", prediction_b="Hi")
    >>> print(result)
    {'score': 0.5}
    r   c                 C   rw   )rx   ry   prediction_br   rA   r   r   r   r{     r|   z-PairwiseEmbeddingDistanceEvalChain.input_keysc                 C   rq   )NZpairwise_embedding_rr   rs   rA   r   r   r   ru     rv   z2PairwiseEmbeddingDistanceEvalChain.evaluation_nameNr}   r~   c                 C   r   )a  Compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (CallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        ry   r   r@   r   r   r   r   r   r     s   

z(PairwiseEmbeddingDistanceEvalChain._callc                    r   )a/  Asynchronously compute the score for two predictions.

        Args:
            inputs (Dict[str, Any]): The input data.
            run_manager (AsyncCallbackManagerForChainRun, optional):
                The callback manager.

        Returns:
            Dict[str, Any]: The computed score.
        ry   r   Nr@   r   r   r   r   r   r     r   z)PairwiseEmbeddingDistanceEvalChain._acallF)r   r   r   r   ry   r   r   r   r   r   r   c          	      K   r   )a  Evaluate the embedding distance between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        ry   r   r   r   	rB   ry   r   r   r   r   r   r   rD   r   r   r   _evaluate_string_pairs  s   
z9PairwiseEmbeddingDistanceEvalChain._evaluate_string_pairsc          	         r   )a  Asynchronously evaluate the embedding distance

        between two predictions.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            callbacks (Callbacks, optional): The callbacks to use.
            tags (List[str], optional): Tags to apply to traces
            metadata (Dict[str, Any], optional): metadata to apply to traces
            **kwargs (Any): Additional keyword arguments.

        Returns:
            dict: A dictionary containing:
                - score: The embedding distance between the two
                    predictions.
        r   r   Nr   r   r   r   r   _aevaluate_string_pairs7  s   
z:PairwiseEmbeddingDistanceEvalChain._aevaluate_string_pairsrR   )r)   r*   r+   r,   rl   rm   rk   r{   ru   rj   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s    	





 
	

(
	
r   )*r,   	functoolsloggingenumr   	importlibr   typingr   r   Zlangchain_core.callbacksr   Z langchain_core.callbacks.managerr   r   Zlangchain_core.embeddingsr	   Zlangchain_core.utilsr
   Zpydanticr   r   Zlangchain.chains.baser   Zlangchain.evaluation.schemar   r   Zlangchain.schemar   r   	getLoggerr)   r   	lru_cacher   r!   r'   rk   r(   r2   ro   r   r   r   r   r   <module>   s8    
	
 Q 
