o
    o i"                     @   s$  d dl Z d dlZd dlZd dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ er^d dlmZ eeef ZG dd deZG dd deZdd Z G dd deZe !dddgZ"G dd deZ#dS )    N)TYPE_CHECKINGAny	GeneratorUnion)DedentError)UnexpectedCharactersUnexpectedToken)Generate)Guide)
RegexGuide)Write)create_states_mapping)grammars)PartialLarkPartialParserState)	Tokenizerc                   @   s   e Zd ZU dZeed< dS )r
   ak  Base definition of a generation guide.

    A generation guide defines the behavior of a finite-state machine that guides
    a text generation procedure. Unlike the DFAs built from regular expressions
    guides can also emit a `Write` instructions which tells the model that it can
    append a sequence of tokens (or token word) instead of generating it.

    initial_stateN)__name__
__module____qualname____doc__r   __annotations__ r   r   ^/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/outlines/fsm/guide.pyr
      s   
 	r
   c                   @   sd   e Zd ZdZdZdZdZdddZded	e	fd
dZ
deded	efddZdefddZdd ZdS )StopAtEOSGuidez@Guide to generate tokens until the EOS token has been generated.   r   	tokenizerr   c                 C   s   |j | _ |j | _dS )zzInitialize the generation guide.

        model
            The logit generator used to generate the next token.

        N)eos_token_id
vocabularyvalues)selfr   r   r   r   __init__/   s   zStopAtEOSGuide.__init__statereturnc                 C   s   |  |rt| jgS td S N)is_final_stater   r   r	   r    r"   r   r   r   get_next_instruction9   s   
z#StopAtEOSGuide.get_next_instructiontoken_idc                 C   s    || j ks
|| jkr| jS | jS r$   )r   final_stater   )r    r"   r(   r   r   r   get_next_state>   s   zStopAtEOSGuide.get_next_statec                 C   s
   || j kS r$   )r)   r&   r   r   r   r%   D   s   
zStopAtEOSGuide.is_final_statec                 C   s   | S r$   r   r    r   r   r   copyG   s   zStopAtEOSGuide.copyN)r   r   )r   r   r   r   r)   Zstart_stater   r!   intInstructionr'   r*   r%   r,   r   r   r   r   r   (   s    

r   c                 O   s   t | |g|R i |S r$   )uncached_create_states_mapping)regex_stringr   argskwargsr   r   r   cached_create_states_mappingK   s   r3   c                       s*   e Zd ZdZedef fddZ  ZS )r   zp
    Guide to generate text in the language of a regular expression.
    CoreRegexGuide with outlines cache
    r0   c                    s   t  j||fdti|S )NZ_create_states_mapping)super
from_regexr3   )clsr0   r   r2   	__class__r   r   r5   U   s   zRegexGuide.from_regex)r   r   r   r   classmethodstrr5   __classcell__r   r   r7   r   r   O   s    r   CFGStateparser_state
prev_tokenc                	   @   s   e Zd ZdZdefddZdedefddZded	e	de
ed
d
f fddZdededefddZdededefddZdedefddZdedefddZdedefddZdddZd
S )CFGGuidezNGuide to generate text that is in the language of a context-free Lark grammar.
cfg_stringc                 C   sN   t d || _|| _| jj| _t|dtjgd| _t	| j
ddd| _dS )zg
        Construct the PartialLark parser and set the empty initial_state (PartialParserState)
        zOutlines' public *community-contributed* CFG structured generation is experimental. Please review https://dottxt-ai.github.io/outlines/latest/reference/generation/cfg#disclaimerZlalr)parserZimport_paths Nr=   r>   )warningswarnr@   r   r   r   r   ZGRAMMAR_PATHrA   r<   parser   )r    r@   r   r   r   r   r!   j   s   
zCFGGuide.__init__r"   r#   c                 C   s\   |j du rtt| jgS t| || jj	 }t
|dkr'tt|S tt|S )aW  Return the next instruction for guided generation.

        Current lazy approach:
        - For each token in the vocabulary
          - create a copy of the parsers state
          - add the tokens to the parsers input text
          - if valid, add token to returned tokens

        Further refinements are necessary for performant text processing.

        Parameters
        ----------
        state
            The guides current PartialParserState, or None if complete

        Returns
        -------
        A `Generate` instance that contains the model and the allowed token ids.

        Nr   )r=   r   torchZtensorr   listiter_valid_token_idsr   r   r   lenr	   )r    r"   Zvalid_tokensr   r   r   r'      s   
zCFGGuide.get_next_instructioncandidate_token_idsNc                 c   sx    |j du r| jV  dS |D ]+}|| jkr| |r|V  qz| |t| |V  W q ttttt	fy9   Y qw dS )a  
        Iterate over the given token_ids and yield those that are valid for the current parser state.

        Parameters
        ----------
        parser_state
            The current state of the parser, or None if complete.
        token_ids
            The list of token ids to check for validity.

        Yields
        ------
        int
            Valid token ids.
        N)
r=   r   can_terminate_state_get_parser_state_token_appliedr-   
ValueErrorEOFErrorr   r   r   )r    r"   rK   r(   r   r   r   rI      s,   




zCFGGuide.iter_valid_token_idsr(   c                 C   s6   |j du s
|| jkrd}n| |t|}t||dS )a  
        Update the state of the guide.
        Decode the token_id, and calculate the new parser_state with the token applied.

        Parameters
        ----------
        state
            The guides current PartialParserState, or None if complete
        token_id
            The id of the token that was just generated.

        Returns
        -------
        The guides new PartialParserState

        NrC   )r=   r   rM   r-   r<   )r    r"   r(   r=   r   r   r   r*      s   zCFGGuide.get_next_statec                 C   s   t  |j}|jdu r| j|gd }n| j|jggd }| j|j|ggd }|t|d }|dkr<td|jj j	|7  _	| j
j|dd |S )a(  
        Don't mutate `parser_state`, copy to protect

        Get the token string
          - if first token in generation: tokenizer.decode (no leading whitespace)
          - else: normalized (with possibly leading whitespace)

        Don't allow empty ("") tokens, raise ValueError
        Nr   rB   zempty next tokenF)Zis_end)r,   r=   r>   r   decoderJ   rN   lexerr"   textrA   Zparse_from_state)r    r"   r(   r=   Znew_token_strZprev_token_strZcombined_token_strr   r   r   rM      s   
z(CFGGuide._get_parser_state_token_appliedc                 C   s
   |  |S r$   )rL   r&   r   r   r   r%      s   
zCFGGuide.is_final_statec                 C   s:   |j durzt|j   W dS  ty   Y dS w dS )z"Generation is allowed to terminateNFT)r=   r,   feed_eofr   r&   r   r   r   rL     s   
zCFGGuide.can_terminate_statec                 C   s    |j du pt|j  dhS )z1Generation must terminate, no legal continuationsNz$END)r=   setZacceptsissubsetr&   r   r   r   must_terminate_state  s   zCFGGuide.must_terminate_statec                 C   s   t | j| jS )zCreate a copy of the Guide.)r?   r@   r   r+   r   r   r   r,     s   zCFGGuide.copy)r#   r?   )r   r   r   r   r:   r!   r<   r.   r'   rH   r   r-   rI   r*   r   rM   boolr%   rL   rV   r,   r   r   r   r   r?   g   s.     
'
!	r?   )$collectionsr,   rD   typingr   r   r   r   rG   Zlark.indenterr   Z
lark.lexerr   r   Zoutlines_core.fsm.guider	   r
   Z	CoreGuider   ZCoreRegexGuider   r   r/   Zoutlinesr   Zoutlines.fsm.parsingr   r   Zoutlines.models.tokenizerr   r.   r   r3   
namedtupler<   r?   r   r   r   r   <module>   s.    #