o
    `+ iYH                     @  s  d Z ddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
 ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ eeZG dd dee
ZG dd deZG dd dZG dd deeZ G dd deeZ!G dd deeZ"dS )z3Interfaces to be implemented by general evaluators.    )annotationsN)ABCabstractmethod)Sequence)Enum)AnyOptionalUnion)warn)AgentAction)BaseLanguageModel)run_in_executor)Chainc                   @  s   e Zd ZdZdZ	 dZ	 dZ	 dZ	 dZ	 dZ		 dZ
	 d	Z	 d
Z	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZ	 dZdS )EvaluatorTypezThe types of the evaluators.ZqaZcot_qaZ
context_qaZpairwise_stringZscore_stringZlabeled_pairwise_stringZlabeled_score_stringZ
trajectorycriteriaZlabeled_criteriaZstring_distanceZexact_matchZregex_matchZpairwise_string_distanceZembedding_distanceZpairwise_embedding_distanceZjson_validityZjson_equalityZjson_edit_distanceZjson_schema_validationN)__name__
__module____qualname____doc__ZQAZCOT_QAZ
CONTEXT_QAZPAIRWISE_STRINGZSCORE_STRINGZLABELED_PAIRWISE_STRINGZLABELED_SCORE_STRINGZAGENT_TRAJECTORYZCRITERIAZLABELED_CRITERIAZSTRING_DISTANCEZEXACT_MATCHZREGEX_MATCHZPAIRWISE_STRING_DISTANCEZEMBEDDING_DISTANCEZPAIRWISE_EMBEDDING_DISTANCEZJSON_VALIDITYZJSON_EQUALITYZJSON_EDIT_DISTANCEZJSON_SCHEMA_VALIDATION r   r   g/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/langchain/evaluation/schema.pyr      sT    r   c                   @  s"   e Zd ZdZeed
ddZd	S )LLMEvalChainz,A base class for evaluators that use an LLM.llmr   kwargsr   returnc                 K     dS )z#Create a new evaluator from an LLM.Nr   )clsr   r   r   r   r   from_llmN       zLLMEvalChain.from_llmN)r   r   r   r   r   r   )r   r   r   r   classmethodr   r   r   r   r   r   r   K   s
    r   c                   @  sX   e Zd ZdZedddZedddZedd	d
ZedddZ		ddddZ	dS )_EvalArgsMixinz(Mixin for checking evaluation arguments.r   boolc                 C  r   z2Whether this evaluator requires a reference label.Fr   selfr   r   r   requires_referenceW      z!_EvalArgsMixin.requires_referencec                 C  r   )0Whether this evaluator requires an input string.Fr   r#   r   r   r   requires_input\   r&   z_EvalArgsMixin.requires_inputstrc                 C     d| j j dS )z&Warning to show when input is ignored.zIgnoring input in , as it is not expected.	__class__r   r#   r   r   r   _skip_input_warninga   s   z"_EvalArgsMixin._skip_input_warningc                 C  r*   )z*Warning to show when reference is ignored.zIgnoring reference in r+   r,   r#   r   r   r   _skip_reference_warningf   s   z&_EvalArgsMixin._skip_reference_warningN	referenceOptional[str]input_Nonec                 C  s   | j r|du r| jj d}t||dur | j s t| jdd | jr2|du r2| jj d}t||durB| jsDt| jdd dS dS dS )a  Check if the evaluation arguments are valid.

        Args:
            reference (Optional[str], optional): The reference label.
            input_ (Optional[str], optional): The input string.
        Raises:
            ValueError: If the evaluator requires an input string but none is provided,
                or if the evaluator requires a reference label but none is provided.
        Nz requires an input string.   )
stacklevelz requires a reference string.)r(   r-   r   
ValueErrorr
   r.   r%   r/   )r$   r0   r2   msgr   r   r   _check_evaluation_argsm   s   z%_EvalArgsMixin._check_evaluation_argsr   r!   r   r)   )NN)r0   r1   r2   r1   r   r3   )
r   r   r   r   propertyr%   r(   r.   r/   r8   r   r   r   r   r    T   s    r    c                   @  sx   e Zd ZdZedddZedddZed	d	d
dddZd	d	d
dddZ	d	d	d
dddZ
d	d	d
dddZd	S ) StringEvaluatorzcGrade, tag, or otherwise evaluate predictions relative to their inputs
    and/or reference labels.r   r)   c                 C  s   | j jS )zThe name of the evaluation.r,   r#   r   r   r   evaluation_name   s   zStringEvaluator.evaluation_namer!   c                 C  r   r"   r   r#   r   r   r   r%      r&   z"StringEvaluator.requires_referenceNr0   input
predictionUnion[str, Any]r0   Optional[Union[str, Any]]r?   r   r   dictc                K  r   )a:  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr   r$   r@   r0   r?   r   r   r   r   _evaluate_strings   r   z!StringEvaluator._evaluate_stringsc                  s$   t d| jf|||d|I dH S )aI  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
                It is recommended that the dictionary contain the following keys:
                     - score: the score of the evaluation, if applicable.
                     - value: the string value of the evaluation, if applicable.
                     - reasoning: the reasoning for the evaluation, if applicable.
        Nr@   r0   r?   )r   rE   rD   r   r   r   _aevaluate_strings   s   z"StringEvaluator._aevaluate_stringsr1   c                K  s&   | j ||d | jd|||d|S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r0   r2   rF   Nr   )r8   rE   rD   r   r   r   evaluate_strings   s   z StringEvaluator.evaluate_stringsc                  s.   | j ||d | jd|||d|I dH S )a	  Asynchronously evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): The LLM or chain prediction to evaluate.
            reference (Optional[str], optional): The reference label to evaluate against.
            input (Optional[str], optional): The input to consider during evaluation.
            kwargs: Additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        rH   rF   Nr   )r8   rG   rD   r   r   r   aevaluate_strings   s   z!StringEvaluator.aevaluate_stringsr:   r9   )
r@   rA   r0   rB   r?   rB   r   r   r   rC   )
r@   r)   r0   r1   r?   r1   r   r   r   rC   )r   r   r   r   r;   r=   r%   r   rE   rG   rI   rJ   r   r   r   r   r<      s&    #r<   c                   @  s\   e Zd ZdZeddddddZddddddZddddddZddddddZdS )PairwiseStringEvaluatorzDCompare the output of two models (or two outputs of the same model).Nr>   r@   r)   prediction_br0   r1   r?   r   r   r   rC   c                K  r   )1  Evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr   r$   r@   rL   r0   r?   r   r   r   r   _evaluate_string_pairs  r   z.PairwiseStringEvaluator._evaluate_string_pairsc                  s&   t d| jf||||d|I dH S )@  Asynchronously evaluate the output string pairs.

        Args:
            prediction (str): The output string from the first model.
            prediction_b (str): The output string from the second model.
            reference (Optional[str], optional): The expected output / reference string.
            input (Optional[str], optional): The input string.
            kwargs: Additional keyword arguments, such as callbacks and optional reference strings.
        Returns:
            dict: A dictionary containing the preference, scores, and/or other information.
        Nr@   rL   r0   r?   )r   rO   rN   r   r   r   _aevaluate_string_pairs  s   z/PairwiseStringEvaluator._aevaluate_string_pairsc                K  s(   | j ||d | jd||||d|S )rM   rH   rQ   Nr   )r8   rO   rN   r   r   r   evaluate_string_pairs8  s   z-PairwiseStringEvaluator.evaluate_string_pairsc                  s0   | j ||d | jd||||d|I dH S )rP   rH   rQ   Nr   )r8   rR   rN   r   r   r   aevaluate_string_pairsU  s   z.PairwiseStringEvaluator.aevaluate_string_pairs)r@   r)   rL   r)   r0   r1   r?   r1   r   r   r   rC   )	r   r   r   r   r   rO   rR   rS   rT   r   r   r   r   rK     s    #"rK   c                   @  sb   e Zd ZdZedddZedddddZdddddZdddddZ	dddddZ
dS )AgentTrajectoryEvaluatorz,Interface for evaluating agent trajectories.r   r!   c                 C  r   )r'   Tr   r#   r   r   r   r(   v  r&   z'AgentTrajectoryEvaluator.requires_inputN)r0   r@   r)   agent_trajectory!Sequence[tuple[AgentAction, str]]r?   r0   r1   r   r   rC   c                K  r   )  Evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        Nr   r$   r@   rV   r?   r0   r   r   r   r   _evaluate_agent_trajectory{  r   z3AgentTrajectoryEvaluator._evaluate_agent_trajectoryc                  s&   t d| jf||||d|I dH S )  Asynchronously evaluate a trajectory.

        Args:
            prediction (str): The final predicted response.
            agent_trajectory (List[Tuple[AgentAction, str]]):
                The intermediate steps forming the agent trajectory.
            input (str): The input to the agent.
            reference (Optional[str]): The reference answer.

        Returns:
            dict: The evaluation result.
        N)r@   rV   r0   r?   )r   rZ   rY   r   r   r   _aevaluate_agent_trajectory  s   z4AgentTrajectoryEvaluator._aevaluate_agent_trajectoryc                K  s(   | j ||d | jd||||d|S )rX   rH   r@   r?   rV   r0   Nr   )r8   rZ   rY   r   r   r   evaluate_agent_trajectory  s   z2AgentTrajectoryEvaluator.evaluate_agent_trajectoryc                  s0   | j ||d | jd||||d|I dH S )r[   rH   r]   Nr   )r8   r\   rY   r   r   r   aevaluate_agent_trajectory  s   z3AgentTrajectoryEvaluator.aevaluate_agent_trajectoryr9   )r@   r)   rV   rW   r?   r)   r0   r1   r   r   r   rC   )r   r   r   r   r;   r(   r   rZ   r\   r^   r_   r   r   r   r   rU   s  s    %$rU   )#r   
__future__r   loggingabcr   r   collections.abcr   enumr   typingr   r   r	   warningsr
   Zlangchain_core.agentsr   Zlangchain_core.language_modelsr   Zlangchain_core.runnables.configr   Zlangchain.chains.baser   	getLoggerr   loggerr)   r   r   r    r<   rK   rU   r   r   r   r   <module>   s&    
6	3zr