o
    `+ i$                     @  s  d dl mZ d dlZd dlZd dlmZ d dlmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d dlm Z  e!e"Z#d&ddZ$G dd de Z%d'd"d#Z&G d$d% d%eZ'dS )(    )annotationsN)Sequence)AnyOptional)CallbackManagerForChainRun)BaseLanguageModel)	AIMessage)StrOutputParser)BasePromptTemplate)BaseRetriever)Runnable)Field)Chain)PROMPTQUESTION_GENERATOR_PROMPTFinishedOutputParser)LLMChainresponser   returntuple[list[str], list[float]]c                 C  s@   g }g }| j d d D ]}||d  ||d  q||fS )z>Extract tokens and log probabilities from chat model response.logprobscontenttokenZlogprob)Zresponse_metadataappend)r   tokens	log_probsr    r   g/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/langchain/chains/flare/base.py_extract_tokens_and_log_probs   s   r   c                   @  s<   e Zd ZU dZeZded< 	 edddZe	dd	d
Z
dS )QuestionGeneratorChainz4Chain that generates questions from uncertain spans.r
   promptr   boolc                 C  s   dS )NFr   )clsr   r   r   is_lc_serializable.   s   z)QuestionGeneratorChain.is_lc_serializable	list[str]c                 C  s   g dS )Input keys for the chain.
user_inputcontextr   r   selfr   r   r   
input_keys2   s   z!QuestionGeneratorChain.input_keysN)r   r!   r   r$   )__name__
__module____qualname____doc__r   r    __annotations__classmethodr#   propertyr+   r   r   r   r   r   (   s   
 r   r   Sequence[str]r   Sequence[float]min_probfloatmin_token_gapintnum_pad_tokensr$   c                   s  zdd l }|||k d }W n ty0   td dd l  fddt|D }Y nw fdd|D }t|dkrBg S |d |d | d gg}t|dd  D ] \}	}
|
| d }|
||	  |k rp||d d< qW|	|
|g qWfdd|D S )	Nr   a  NumPy not found in the current Python environment. FlareChain will use a pure Python implementation for internal calculations, which may significantly impact performance, especially for large datasets. For optimal speed and efficiency, consider installing NumPy: pip install numpyc                   s"   g | ]\}}  |k r|qS r   )exp).0idxZlog_prob)mathr6   r   r   
<listcomp>M   s
    z)_low_confidence_spans.<locals>.<listcomp>c                   s    g | ]}t d  | r|qS )z\w)research)r<   ir   r   r   r?   R   s        c                   s"   g | ]\}}d   || qS ) )join)r<   startendrC   r   r   r?   \   s   " )
numpywherer;   ImportErrorloggerwarningr>   	enumeratelenr   )r   r   r6   r8   r:   npZ_low_idxZlow_idxspansrB   r=   rI   r   )r>   r6   r   r   _low_confidence_spans8   s,   
rS   c                   @  s   e Zd ZU dZded< 	 ded< 	 eedZded< 	 ded	< 	 d
Zded< 	 dZ	ded< 	 dZ
ded< 	 dZded< 	 dZded< 	 ed9ddZed9ddZd:d$d%Zd;d(d)Z	*d<d=d/d0Ze	1d>d?d7d8Zd*S )@
FlareChainzChain that combines a retriever, a question generator,
    and a response generator.

    See [Active Retrieval Augmented Generation](https://arxiv.org/abs/2305.06983) paper.
    r   question_generator_chainresponse_chain)default_factoryr   output_parserr   	retrieverg?r7   r6      r9   r8      r:   
   max_iterTr!   start_with_retrievalr   r$   c                 C     dgS )r%   r'   r   r)   r   r   r   r+   y      zFlareChain.input_keysc                 C  r_   )zOutput keys for the chain.r   r   r)   r   r   r   output_keys~   r`   zFlareChain.output_keys	questionsr'   strr   _run_managerr   tuple[str, bool]c                 C  s~   |  }g }|D ]}|| j| qddd |D }| j|||dd|i}	t|	tr3|	j}	| j	
|	\}
}|
|fS )Nz

c                 s  s    | ]}|j V  qd S N)Zpage_content)r<   dr   r   r   	<genexpr>   s    z,FlareChain._do_generation.<locals>.<genexpr>r&   	callbacks)	get_childextendrY   invokerG   rV   
isinstancer   r   rX   parse)r*   rb   r'   r   rd   ri   docsquestionr(   resultmarginalfinishedr   r   r   _do_generation   s    
zFlareChain._do_generationlow_confidence_spansinitial_responsec           
        s    fdd|D }|  }tjtr&jj||d}fdd|D }	n
jj|d|id}	|jd|	 dd	d
 |	||S )Nc                   s   g | ]} |d qS ))r'   Zcurrent_responseZuncertain_spanr   )r<   span)rv   r'   r   r   r?      s    z,FlareChain._do_retrieval.<locals>.<listcomp>)ri   c                   s   g | ]
}| j jd   qS )r   )rU   ra   )r<   outputr)   r   r   r?      s    ri   )configzGenerated Questions: yellow
colorrI   )rj   rm   rU   r   applybatchon_textrt   )
r*   ru   rd   r'   r   rv   Zquestion_gen_inputsri   Zquestion_gen_outputsrb   r   )rv   r*   r'   r   _do_retrieval   s,   
zFlareChain._do_retrievalNinputsdict[str, Any]run_manager$Optional[CallbackManagerForChainRun]c                 C  s  |pt  }|| jd  }d}t| jD ]g}|jd| ddd |d|d}t| j|d|	 i\}}	t
||	| j| j| j}
| d	 d| }|
sd|}| j|\}}|rc| jd |i  S q| |
||||\}}| d	 | }|r{ nq| jd |iS )
Nr   rF   zCurrent Response: bluer{   r|   r&   ri    )r   Zget_noop_managerr+   ranger]   r   r   rV   rl   rj   rS   r6   r8   r:   striprG   rX   rn   ra   r   )r*   r   r   rd   r'   r   Z_i_inputr   r   ru   rv   Zfinal_responsers   rr   r   r   r   _call   sT   
zFlareChain._call    llmr   max_generation_lenkwargsr   c           	   
   K  sp   zddl m} W n ty } zd}t||d}~ww ||ddd}t|B }t|B t B }| d||d|S )	aH  Creates a FlareChain from a language model.

        Args:
            llm: Language model to use.
            max_generation_len: Maximum length of the generated response.
            kwargs: Additional arguments to pass to the constructor.

        Returns:
            FlareChain class with the given language model.
        r   )
ChatOpenAIz_OpenAI is required for FlareChain. Please install langchain-openai.pip install langchain-openaiNT)Zmax_completion_tokensr   Ztemperature)rU   rV   r   )Zlangchain_openair   rL   r   r   r	   )	r"   r   r   r   r   emsgrV   Zquestion_gen_chainr   r   r   from_llm   s*   
zFlareChain.from_llmr,   )
rb   r$   r'   rc   r   rc   rd   r   r   re   )ru   r$   rd   r   r'   rc   r   rc   rv   rc   r   re   rf   )r   r   r   r   r   r   )r   )r   r   r   r9   r   r   r   rT   )r-   r.   r/   r0   r1   r   r   rX   r6   r8   r:   r]   r^   r3   r+   ra   rt   r   r   r2   r   r   r   r   r   rT   _   s>   
 

)3rT   )r   r   r   r   )r   r4   r   r5   r6   r7   r8   r9   r:   r9   r   r$   )(
__future__r   loggingr@   collections.abcr   typingr   r   Zlangchain_core.callbacksr   Zlangchain_core.language_modelsr   Zlangchain_core.messagesr   Zlangchain_core.output_parsersr	   Zlangchain_core.promptsr
   Zlangchain_core.retrieversr   Zlangchain_core.runnablesr   Zpydanticr   Zlangchain.chains.baser   Zlangchain.chains.flare.promptsr   r   r   Zlangchain.chains.llmr   	getLoggerr-   rM   r   r   rS   rT   r   r   r   r   <module>   s*    



'