o
    {qi*                     @  s  d Z ddlmZ ddlZddlZddlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZmZ ddlmZ dddZdddZ G dd deeeZ!G dd deeeZ"G dd de"Z#dS ) z-LLM Chains for evaluating question answering.    )annotationsN)Sequence)AnyOptional)	Callbacks)BaseLanguageModel)PromptTemplate)
ConfigDict)override)LLMChain)CONTEXT_PROMPT
COT_PROMPTPROMPT)LLMEvalChainStringEvaluator)RUN_KEYtextstrreturnOptional[tuple[str, int]]c                 C  s   t d|  t j}|r"|d dkrdS |d dkr"dS zI|   d t	ddt
j}| dkr=W dS | dkrFW dS |   d	 t	ddt
j}| dkr`W dS | dkriW dS W d S  tyu   Y d S w )
Nzgrade:\s*(correct|incorrect)   CORRECT)r   r   	INCORRECT)r   r   r    )researchstrip
IGNORECASEgroupuppersplit	translater   	maketransstringpunctuation
IndexError)r   matchZ
first_word	last_word r)   i/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/langchain/evaluation/qa/eval_chain.py
_get_score   s8    r+   dictc                 C  s6   |   }t|}|du rd\}}n|\}}|||dS )zParse the output text.

    Args:
        text (str): The output text to parse.

    Returns:
        Any: The parsed output.
    N)NN)	reasoningvaluescore)r   r+   )r   r-   Zparsed_scoresr.   r/   r)   r)   r*   _parse_string_eval_output3   s   	
r0   c                   @  s   e Zd ZU dZdZded< eddZed7d	d
Z	e
d8ddZe
d7ddZe
d7ddZe	d9d:ddZ			d;ddd<d'd(Zd=d*d+Zedddd,d-d>d3d4Zedddd,d-d>d5d6ZdS )?QAEvalChainz,LLM Chain for evaluating question answering.resultsr   
output_keyignoreextrar   boolc                 C     dS NFr)   clsr)   r)   r*   is_lc_serializableR      zQAEvalChain.is_lc_serializablec                 C  r8   )NZcorrectnessr)   selfr)   r)   r*   evaluation_nameV   r=   zQAEvalChain.evaluation_namec                 C  r8   NTr)   r>   r)   r)   r*   requires_referenceZ   r=   zQAEvalChain.requires_referencec                 C  r8   rA   r)   r>   r)   r)   r*   requires_input^   r=   zQAEvalChain.requires_inputNllmr   promptOptional[PromptTemplate]kwargsr   c                 K  sL   |pt }h d}|t|jkrd| d|j }t|| d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'input', 'answer' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            QAEvalChain: the loaded QA eval chain.
        >   resultqueryanswerInput variables should be 
, but got rD   rE   Nr)   )r   setinput_variables
ValueError)r;   rD   rE   rG   expected_input_varsmsgr)   r)   r*   from_llmb   s   zQAEvalChain.from_llmrI   rJ   rH   	callbacksexamplesSequence[dict]predictionsquestion_key
answer_keyprediction_keyrU   r   
list[dict]c                  *    fddt |D }| j||dS )5Evaluate question answering examples and predictions.c                   ,   g | ]\}}| |  |  d qS )rI   rJ   rH   r)   .0iZexamplerZ   r[   rX   rY   r)   r*   
<listcomp>       
z(QAEvalChain.evaluate.<locals>.<listcomp>rT   	enumerateapply)r?   rV   rX   rY   rZ   r[   rU   inputsr)   rd   r*   evaluate      	zQAEvalChain.evaluater,   c                 C  &   t || j }t|v r|t |t< |S Nr0   r3   r   r?   rH   parsed_resultr)   r)   r*   _prepare_output      zQAEvalChain._prepare_outputF	referenceinputrU   include_run_info
predictionru   Optional[str]rv   rw   c                K      | |||d||d}|  |S )a  Evaluate Chain or LLM output, based on optional input and label.

        Args:
            prediction (str): the LLM or chain prediction to evaluate.
            reference (Optional[str], optional): the reference label
                to evaluate against.
            input (Optional[str], optional): the input to consider during evaluation
            callbacks (Callbacks, optional): the callbacks to use for tracing.
            include_run_info (bool, optional): whether to include run info in the
                returned results.
            **kwargs: additional keyword arguments, including callbacks, tags, etc.
        Returns:
            dict: The evaluation results containing the score or value.
        r`   rU   rw   rr   r?   rx   ru   rv   rU   rw   rG   rH   r)   r)   r*   _evaluate_strings   s   
	zQAEvalChain._evaluate_stringsc                  *   | j |||d||dI d H }| |S )Nr`   rj   rU   rw   Zacallrr   r}   r)   r)   r*   _aevaluate_strings      

zQAEvalChain._aevaluate_stringsr   r7   r   r   rn   )rD   r   rE   rF   rG   r   r   r1   r`   )rV   rW   rX   rW   rY   r   rZ   r   r[   r   rU   r   r   r\   rH   r,   r   r,   rx   r   ru   ry   rv   ry   rU   r   rw   r7   rG   r   r   r,   )__name__
__module____qualname____doc__r3   __annotations__r	   model_configclassmethodr<   propertyr@   rB   rC   rS   rk   rr   r
   r~   r   r)   r)   r)   r*   r1   I   sH   
 #
#r1   c                   @  s   e Zd ZdZed8ddZed8ddZed8dd	Ze	d
dZ
ed9ddZed:ddZe	d;d<ddZ			d=ddd>d(d)Zd?d+d,Zedddd-d.d@d4d5Zedddd-d.d@d6d7ZdS )AContextQAEvalChainz3LLM Chain for evaluating QA w/o GT based on contextr   r7   c                 C  r8   r9   r)   r:   r)   r)   r*   r<      r=   z%ContextQAEvalChain.is_lc_serializablec                 C  r8   )z.Whether the chain requires a reference string.Tr)   r>   r)   r)   r*   rB         z%ContextQAEvalChain.requires_referencec                 C  r8   )z+Whether the chain requires an input string.Tr)   r>   r)   r)   r*   rC      r   z!ContextQAEvalChain.requires_inputr4   r5   rE   r   Nonec                 C  s4   h d}|t |jkrd| d|j }t|d S )N>   contextrH   rI   rK   rL   )rN   rO   rP   )r;   rE   rQ   rR   r)   r)   r*   _validate_input_vars   s   z'ContextQAEvalChain._validate_input_varsr   c                 C  r8   )NzContextual Accuracyr)   r>   r)   r)   r*   r@      r=   z"ContextQAEvalChain.evaluation_nameNrD   r   rF   rG   r   c                 K  &   |pt }| | | d||d|S )a  Load QA Eval Chain from LLM.

        Args:
            llm (BaseLanguageModel): the base language model to use.

            prompt (PromptTemplate): A prompt template containing the input_variables:
            'query', 'context' and 'result' that will be used as the prompt
            for evaluation.
            Defaults to PROMPT.

            **kwargs: additional keyword arguments.

        Returns:
            ContextQAEvalChain: the loaded QA eval chain.
        rM   Nr)   )r   r   r;   rD   rE   rG   r)   r)   r*   rS      s   
zContextQAEvalChain.from_llmrI   r   rH   rT   rV   r\   rX   rY   context_keyr[   rU   r   c                  r]   )r^   c                   r_   )rI   r   rH   r)   ra   r   r[   rX   rY   r)   r*   re     rf   z/ContextQAEvalChain.evaluate.<locals>.<listcomp>rT   rg   )r?   rV   rX   rY   r   r[   rU   rj   r)   r   r*   rk     rl   zContextQAEvalChain.evaluater,   c                 C  rm   rn   ro   rp   r)   r)   r*   rr   (  rs   z"ContextQAEvalChain._prepare_outputFrt   rx   ru   ry   rv   rw   c                K  rz   )Nr   r{   r|   r}   r)   r)   r*   r~   .  s   
	z$ContextQAEvalChain._evaluate_stringsc                  r   )Nr   r   r   r}   r)   r)   r*   r   D  r   z%ContextQAEvalChain._aevaluate_stringsr   )rE   r   r   r   r   rn   )rD   r   rE   rF   rG   r   r   r   r   )rV   r\   rX   r\   rY   r   r   r   r[   r   rU   r   r   r\   r   r   )r   r   r   r   r   r<   r   rB   rC   r	   r   r   r@   rS   rk   rr   r
   r~   r   r)   r)   r)   r*   r      sJ    	
r   c                   @  s>   e Zd ZdZedddZedddZe		ddddZd	S )CotQAEvalChainz=LLM Chain for evaluating QA using chain of thought reasoning.r   r7   c                 C  r8   r9   r)   r:   r)   r)   r*   r<   Z  r=   z!CotQAEvalChain.is_lc_serializabler   c                 C  r8   )NzCOT Contextual Accuracyr)   r>   r)   r)   r*   r@   ^  r=   zCotQAEvalChain.evaluation_nameNrD   r   rE   rF   rG   r   c                 K  r   )zLoad QA Eval Chain from LLM.rM   Nr)   )r   r   r   r)   r)   r*   rS   b  s   
zCotQAEvalChain.from_llmr   r   rn   )rD   r   rE   rF   rG   r   r   r   )	r   r   r   r   r   r<   r   r@   rS   r)   r)   r)   r*   r   W  s    r   )r   r   r   r   )r   r   r   r,   )$r   
__future__r   r   r$   collections.abcr   typingr   r   Zlangchain_core.callbacksr   Zlangchain_core.language_modelsr   Zlangchain_core.promptsr   Zpydanticr	   Ztyping_extensionsr
   Zlangchain.chains.llmr   Z#langchain.evaluation.qa.eval_promptr   r   r   Zlangchain.evaluation.schemar   r   Zlangchain.schemar   r+   r0   r1   r   r   r)   r)   r)   r*   <module>   s,    

  