o
    )i!                     @   s   d dl Z d dlZd dlmZmZ d dlmZmZmZm	Z	m
Z
 ddlmZmZmZmZmZmZmZmZmZ dedefdd	Zd
edefddZeG dd dZG dd de jZG dd deZG dd deZG dd deZG dd deZdS )    N)	dataclassfield)AnyHashableListOptionalTypeVar   )	COMPLETE_ALPHABETCONFIG_ENV_VAR_DEFAULT_ALPHABETWHITESPACE_CHARACTERS#DEFAULT_MAX_CONSECUTIVE_WHITESPACESDEFAULT_FORCE_JSON_FIELD_ORDER*CONFIG_ENV_VAR_MAX_CONSECUTIVE_WHITESPACES&CONFIG_ENV_VAR_STRICT_JSON_FIELD_ORDER$CONFIG_ENV_VAR_MAX_JSON_ARRAY_LENGTHDEFAULT_MAX_JSON_ARRAY_LENGTHsreturnc                 C   s   | o	|    dv S )N)true1)striplower)r    r   q/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/lmformatenforcer/characterlevelparser.py_parse_bool      r   env_varc                    s2   t  }|tkr
tn| fdd}t|dS )Nc                      s   t jt S N)osenvirongetstrr   default_valr   Zparser_funcr   r   factory_func   s   z+_env_or_default_field.<locals>.factory_func)default_factory)typeboolr   r   )r   r$   Zdefault_val_typer%   r   r#   r   _env_or_default_field   s   
r)   c                   @   sZ   e Zd ZU eeeZeed< ee	e
Zeed< 	 eeeZeed< 	 eeeZeed< dS )CharacterLevelParserConfigalphabetmax_consecutive_whitespacesforce_json_field_ordermax_json_array_lengthN)__name__
__module____qualname__r)   r   r
   r+   r"   __annotations__r   r   r,   intr   r   r-   r(   r   r   r.   r   r   r   r   r*      s    
 r*   c                   @   s   e Zd ZdZddee fddZejde	dd fdd	Z
ejde	fd
dZejdefddZdee fddZdee fddZedefddZejdefddZdS )CharacterLevelParserzCharacterLevelParser is an interface for classes that can parse strings one character at a time, and determine which characters are allowed at any specific timeNconfigc                 C   s   |pt  | _d S r   )r*   _config)selfr5   r   r   r   __init__+   s   zCharacterLevelParser.__init__new_characterr   c                 C      t  )zAdd a character to the parser, and return a new parser that represents the state of the parser after the character has been added. This has to be
        an immutable operation - the original CharacterLevelParser (self) must not be modified.NotImplementedErrorr7   r9   r   r   r   add_character.   s   z"CharacterLevelParser.add_characterc                 C   r:   )zgReturn a string containing all characters that are allowed at the current point in the parsing process.r;   r7   r   r   r   get_allowed_characters4      z+CharacterLevelParser.get_allowed_charactersc                 C   r:   )zReturn True if the parser is in a state where it can end (potentially finished parsing the desired structure), and False otherwise.r;   r?   r   r   r   can_end9   rA   zCharacterLevelParser.can_endc                 C      dS )zpOptional. Return a key that denotes that this state is a repeating state, full tree traversal should be avoided.Nr   r?   r   r   r   shortcut_key>      z!CharacterLevelParser.shortcut_keyc                 C   rC   )z|Optional. Return a key that denotes that this state is a repeating state, and if it is visited again, results can be cached.Nr   r?   r   r   r   	cache_keyB   rE   zCharacterLevelParser.cache_keyc                 C   s   | j S r   r6   r?   r   r   r   r5   F   s   zCharacterLevelParser.config
new_configc                 C   s
   || _ | S r   rG   )r7   rH   r   r   r   r5   J   s   r   )r/   r0   r1   __doc__r   r*   r8   abcabstractmethodr"   r>   r@   r(   rB   r   rD   rF   propertyr5   setterr   r   r   r   r4   (   s    r4   c                   @   sL   e Zd ZdZdefddZdedefddZdefd	d
Zde	fddZ
dS )StringParsera  RegexParser is an example CharacterLevelParser that only allows an exact string. It is a debugging / learning tool
    to show how CharacterLevelParser works together with TokenizerPrefixTree to filter the allowed tokens (some of whom may contain multiple characters)stringc                 C   
   || _ d S r   
target_str)r7   rO   r   r   r   r8   S      
zStringParser.__init__r9   r   c                 C   s>   | j |rt| j t|d  S td| j d  d| d)Nz
Expected 'r   z' but got '')rR   
startswithrN   len
ValueErrorr=   r   r   r   r>   V   s   zStringParser.add_characterc                 C   s   | j r| j d S dS )Nr    rQ   r?   r   r   r   r@   \   r   z#StringParser.get_allowed_charactersc                 C   s   | j  S r   rQ   r?   r   r   r   rB   _   s   zStringParser.can_endN)r/   r0   r1   rI   r"   r8   r4   r>   r@   r(   rB   r   r   r   r   rN   P   s    rN   c                   @   sN   e Zd ZdZddefddZdedefdd	Zdefd
dZ	defddZ
dS )ForceStopParserzbA simple parser that forbids any characters except the stop token. Used to force stop LM operationFallow_whitespacec                 C   rP   r   )rZ   )r7   rZ   r   r   r   r8   e   rS   zForceStopParser.__init__r9   r   c                 C   s   | S r   r   r=   r   r   r   r>   g      zForceStopParser.add_characterc                 C   s   | j rtS dS NrX   )rZ   r   r?   r   r   r   r@   i   s   z&ForceStopParser.get_allowed_charactersc                 C   rC   )NTr   r?   r   r   r   rB   k   r[   zForceStopParser.can_endN)F)r/   r0   r1   rI   r(   r8   r"   r4   r>   r@   rB   r   r   r   r   rY   c   s    rY   c                   @   st   e Zd ZdZdee fddZdedefddZdefd	d
Z	de
fddZdee fddZdee fddZdS )UnionParserzWA parser that allows a string that would be allowed by any of several different parsersparsersc                 C   rP   r   r^   r7   r^   r   r   r   r8   q   rS   zUnionParser.__init__r9   r   c                    sB    fdd| j D } fdd|D }t|dkr|d S t|S )Nc                    s   g | ]
} |  v r|qS r   r@   .0parserr9   r   r   
<listcomp>v   s    z-UnionParser.add_character.<locals>.<listcomp>c                    s   g | ]}|  qS r   )r>   rb   re   r   r   rf   w   s    r	   r   )r^   rV   r]   )r7   r9   Zrelevant_parsersnext_parsersr   re   r   r>   t   s
   zUnionParser.add_characterc                 C   s$   d dd | jD }d t|S )NrX   c                 S      g | ]}|  qS r   ra   rb   r   r   r   rf   }       z6UnionParser.get_allowed_characters.<locals>.<listcomp>)joinr^   set)r7   allowedr   r   r   r@   |   s   z"UnionParser.get_allowed_charactersc                 C      t dd | jD S )Nc                 S   rh   r   rB   rb   r   r   r   rf      ri   z'UnionParser.can_end.<locals>.<listcomp>)anyr^   r?   r   r   r   rB      r   zUnionParser.can_endc                 C   s0   t dd | jD }t|dkrtt|S d S )Nc                 s       | ]}|  V  qd S r   )rD   rb   r   r   r   	<genexpr>       z+UnionParser.shortcut_key.<locals>.<genexpr>r	   )rk   r^   rV   nextiter)r7   Zunique_shortcut_keysr   r   r   rD      s   zUnionParser.shortcut_keyc                 C   2   t dd | jD }tdd |D rd|fS d S )Nc                 s   rp   r   rF   rb   r   r   r   rq      rr   z(UnionParser.cache_key.<locals>.<genexpr>c                 s       | ]}|d uV  qd S r   r   rc   keyr   r   r   rq      rr   uniontupler^   allr7   Zall_cache_keysr   r   r   rF         zUnionParser.cache_keyN)r/   r0   r1   rI   r   r4   r8   r"   r>   r@   r(   rB   r   r   rD   rF   r   r   r   r   r]   o   s    r]   c                   @   st   e Zd ZdZdee fddZdedefddZdefd	d
Z	de
fddZdee fddZdee fddZdS )SequenceParserz0A parser that is a sequence of multiple parsers.r^   c                 C   rP   r   r_   r`   r   r   r   r8      rS   zSequenceParser.__init__r9   r   c                 C   s   g }t | jD ]6\}}|| v r7||}|g| j|d d   }t|dkr0||d  n|t| | s= nqt|dkrH|d S t|S Nr	   r   )		enumerater^   r@   r>   rV   appendr   rB   r]   )r7   r9   Zlegal_parsersidxrd   Zupdated_parserrg   r   r   r   r>      s   
zSequenceParser.add_characterc                 C   s6   t  }| jD ]}||  | s nqd|S r\   )rk   r^   updater@   rB   rj   )r7   Zallowed_charactersrd   r   r   r   r@      s   

z%SequenceParser.get_allowed_charactersc                 C   rm   )Nc                 S   rh   r   rn   rb   r   r   r   rf      ri   z*SequenceParser.can_end.<locals>.<listcomp>)r}   r^   r?   r   r   r   rB      r   zSequenceParser.can_endc                 C   s    t | jdkr| jd  S d S r   )rV   r^   rD   r?   r   r   r   rD      s    zSequenceParser.shortcut_keyc                 C   ru   )Nc                 s   rp   r   rv   rb   r   r   r   rq      rr   z+SequenceParser.cache_key.<locals>.<genexpr>c                 s   rw   r   r   rx   r   r   r   rq      rr   sequencer{   r~   r   r   r   rF      r   zSequenceParser.cache_keyN)r/   r0   r1   rI   r   r4   r8   r"   r>   r@   r(   rB   r   rD   r   rF   r   r   r   r   r      s    r   ) rJ   r   dataclassesr   r   typingr   r   r   r   r   Zconstsr
   r   r   r   r   r   r   r   r   r"   r(   r   r)   r*   ABCr4   rN   rY   r]   r   r   r   r   r   <module>   s    ,(!