o
    o i\                     @   sb  d dl m Z mZ d dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZmZmZmZmZ d dlZd dlmZmZmZ d dlmZ d dlmZmZ d d	lmZmZ d d
lm Z m!Z! d dl"m#Z# d dl$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, d dl-m.Z.m/Z/m0Z0 d dl1m2Z2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z:m;Z;m<Z<m=Z= d dl>m?Z?m@Z@mAZA eeBeCf ZDeeCe	f ZEeG dd dZFeG dd dZGG dd deZHG dd deZIG dd de(ZJG dd de/ZKG dd de.ZLG d d! d!e:ZMG d"d# d#e<ZNG d$d% d%e=ZOG d&d' d'e)ZPG d(d) d)e&ZQG d*d+ d+e%ZRG d,d- d-e#ZSG d.d/ d/eSZTd0eeJeLf fd1d2ZUd3eId4eeBef fd5d6ZVd7ee d4eeeeCeeeeCeCf  eeC eeCeeC f f f f fd8d9ZWd:eeC d;eeCeeeeCeCf  eeC eeCeeC f f f d4e
eeCeXeXf ddf fd<d=ZY	>dEd?e?d@eeC dAeCdBeXd4eeC f
dCdDZZdS )F    )copydeepcopy)	dataclass)	lru_cache)AnyDict	FrozenSet	GeneratorIteratorListOptionalSequenceSetTupleUnionN)FSMAlphabetOblivionError)Unsupported)LarkToken)	LexerConf
ParserConf)LexErrorUnexpectedInput)Indenter)
BasicLexerContextualLexer
LexerStateLexerThreadScannerUnexpectedCharactersUnexpectedToken_create_unless)ParsingFrontendPostLexConnector_validate_frontend_args)ActionIntParseTableLALR_Analyzer
ParseTableShift)InteractiveParser)LALR_Parser	ParseConfParserState_Parser)	BetterFSMget_token_transition_keysmake_deterministic_fsmc                   @   s.   e Zd ZU eed< eed< eed< eed< dS )PartialTerminalInfopriorityterminal_namecan_transitionis_finalN)__name__
__module____qualname__int__annotations__strbool r@   r@   `/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/outlines/fsm/parsing.pyr4   ;   s
   
 r4   c                   @   sF   e Zd ZU eedf ed< eed< eedf ed< eedf ed< dS )PartialTokensInfo.fsm_state_seqis_not_finishedterminals_and_infofinal_terminals_and_infoN)r9   r:   r;   r   r<   r=   r?   r4   r@   r@   r@   rA   rB   C   s
   
 rB   c                       s    e Zd ZdZ fddZ  ZS )PartialParserConf)rulesstartparser_typedeterministicuse_value_stackc                    s    t  ||| || _|| _d S N)super__init__rK   rL   )selfrH   	callbacksrI   rK   rL   	__class__r@   rA   rO   T   s   
zPartialParserConf.__init__)r9   r:   r;   __serialize_fields__rO   __classcell__r@   r@   rR   rA   rG   K   s    rG   c                       sR   e Zd ZdZ fddZddeddfdd	ZdddZdd ZddddZ	  Z
S )PartialLark)parserrH   optionsrK   rL   c                    sL   | dd| _| dd| _d|d< t j|fi | | jjdks$J d S )NrK   FrL   Tregexlalr)poprK   rL   rN   rO   rX   rW   )rP   ZgrammarrX   rR   r@   rA   rO   c   s
   zPartialLark.__init__Fdont_ignorereturnPartialBasicLexerc                 C   s,   | j }|rddlm} ||}d|_t|S )Nr   )r   r@   )
lexer_confr   ignorer^   )rP   r\   r_   r   r@   r@   rA   _build_lexerm   s   zPartialLark._build_lexerPartialParsingFrontendc                 C   s   |    t| jj| jj t| j| j| jj| j	| j
}| jj}| jj}| j}t|ts-J t|ts4J ||_|| j_t||| jS rM   )Z_prepare_callbacksr&   rX   rW   lexerrG   rH   
_callbacksrI   rK   rL   r_   
isinstancer   r   rJ   
lexer_typerb   )rP   parser_confrJ   rf   r_   r@   r@   rA   _build_parserw   s"   	zPartialLark._build_parserc                 C   s    d t| j| j| jj| jjS )Nz,{}(open({!r}), parser={!r}, lexer={!r}, ...))formattyper9   source_pathrX   rW   rc   rP   r@   r@   rA   __repr__   s   zPartialLark.__repr__parse_statePartialParseStatec                 C   s   | j j j j||dS )Nis_end)rW   parse_from_state)rP   rn   rq   r@   r@   rA   rr      s   zPartialLark.parse_from_stateF)r]   rb   )rn   ro   )r9   r:   r;   rT   rO   r?   ra   rh   rm   rr   rU   r@   r@   rR   rA   rV   Z   s    


rV   c                   @      e Zd Zdd Zdd ZdS )PartialLexerThreadc                 C   s   t | t| jt| jS rM   )rj   r   rc   staterl   r@   r@   rA   __copy__   s   zPartialLexerThread.__copy__c                 C       t | j d| jd| jdS )N(lexer=z, state=))rj   r9   rc   rv   rl   r@   r@   rA   rm          zPartialLexerThread.__repr__Nr9   r:   r;   rw   rm   r@   r@   r@   rA   ru          ru   c                   @   rt   )PartialPostLexConnectorc                 C   s   t | | jt| jS rM   )rj   rc   r   	postlexerrl   r@   r@   rA   rw      s   z PartialPostLexConnector.__copy__c                 C   rx   )Nry   z, postlexer=rz   )rj   r9   rc   r   rl   r@   r@   rA   rm      s   z PartialPostLexConnector.__repr__Nr|   r@   r@   r@   rA   r~      r}   r~   c                       sR   e Zd Zd fdd	Zdd Zdd Zedd	 Zed
d Zedd Z	  Z
S )rb   Nc                    sx   |j dksJ t|jd< t|jd< t|jd< t|jd< t j||||d |jr1t	| j
j
|j| _
d | _d | _d | _d S )NrZ   r-   r   r   r   )rW   )rJ   PartialLALRParserZ_pluginsr^   PartialContextualLexerru   rN   rO   Zpostlexr~   rc   _termset_fsm_info_symbols_to_states_reverse_shifts)rP   r_   rg   rX   rW   rR   r@   rA   rO      s   



zPartialParsingFrontend.__init__c                 C   s   i | _ i | _| jjj}|j D ]3\}}| D ]*\}}|d tkr5| j |d i }||t 	| | j|t 	||f qqdS )z4Compute state transition and symbols-to-states maps.r      N)
r   r   rW   parse_tablestatesitemsr+   
setdefaultsetadd)rP   r   Z
from_stateZsymbols_to_opssymbolopZsymbols_to_from_statesr@   r@   rA   _compute_maps   s   
z$PartialParsingFrontend._compute_mapsc                    s~   t | }i }i  |j D ]#\}}|j}tdd |jD }|j|jf||<  |t	 
| q fdd| D | _dS )ag  Collect and return information about terminal symbol sets and their FSMs.

        Terminal symbol sets (or "termsets") are ordered sequences of terminal
        symbols that are used by each parser state.  Associated with each is a
        collection of FSMs for each terminal and a single parse state FSM that is
        the union of each terminal's FSM.

        This constructs a list of tuples containing the termset, the set of
        parse states that use the termsets, parse state FSMs, and information
        mapping the components of the parse state FSMs to their terminal symbol
        FSMs.

        c                 s       | ]}|j V  qd S rM   name).0termr@   r@   rA   	<genexpr>       zCPartialParsingFrontend._compute_termset_fsm_info.<locals>.<genexpr>c                    s(   g | ]\}\}}|t  | ||fqS r@   )	frozenset)r   Ztermsetfsmfsms_to_trans_finalsZtermsets_to_parse_statesr@   rA   
<listcomp>  s    

zDPartialParsingFrontend._compute_termset_fsm_info.<locals>.<listcomp>N)get_contextual_lexerlexersr   scannertuple	terminalsr   r   r   r   r   r   )rP   Zcontext_lexerZtermsets_to_fsmsrn   rc   r   keyr@   r   rA   _compute_termset_fsm_info   s   
z0PartialParsingFrontend._compute_termset_fsm_infoc                 C      | j d u r	|   | j S rM   )r   r   rl   r@   r@   rA   termset_fsm_info     
z'PartialParsingFrontend.termset_fsm_infoc                 C   r   rM   )r   r   rl   r@   r@   rA   symbols_to_states  r   z(PartialParsingFrontend.symbols_to_statesc                 C   r   rM   )r   r   rl   r@   r@   rA   reverse_shifts  r   z%PartialParsingFrontend.reverse_shiftsrM   )r9   r:   r;   rO   r   r   propertyr   r   r   rU   r@   r@   rR   rA   rb      s    (!

rb   c                   @   s$   e Zd ZdddZedddZdS )	r   Fc           
         s   t ||js|ndd}|  |j}|| _|j| _|jri   fddt| jj	 dd d}i }|D ]}fdd	| jj| 
 D }	|	||< q4t| j|fd
d	| jj
 D fdd	| jj
 D | _|st| j| _tt| jj	 |	 | _t| j|||jd| _d S )NTdebugc                    s2     | }|d u rtt| dd d}| | < |S )Nc                 S      t | S rM   r>   yr@   r@   rA   <lambda>7      z>PartialLALRParser.__init__.<locals>.to_tuple.<locals>.<lambda>r   )getr   sorted)vnew)
old_to_newr@   rA   to_tuple4  s
   
z,PartialLALRParser.__init__.<locals>.to_tuplec                 S   s   t t| dd dS )Nc                 S   r   rM   r   r   r@   r@   rA   r   =  r   z>PartialLALRParser.__init__.<locals>.<lambda>.<locals>.<lambda>r   )r>   r   xr@   r@   rA   r   =      z,PartialLALRParser.__init__.<locals>.<lambda>r   c                    s6   i | ]\}}||d  t ur|n	|d   |d fqS )r   r   )r+   )r   r   r   r   r@   rA   
<dictcomp>B  s    $z.PartialLALRParser.__init__.<locals>.<dictcomp>c                       i | ]	\}}| |qS r@   r@   r   kr   r   r@   rA   r   J      c                    r   r@   r@   r   r   r@   rA   r   K  r   rL   )r)   rK   Zcompute_lalrrQ   rg   r   _parse_tabler   r   keysr   rj   Zstart_statesZ
end_statesr(   Zfrom_ParseTabledictzipZstates_to_rulesetsPartialParserrL   rW   )
rP   rg   r   strictZanalysisrQ   enum
new_statesstransitionsr@   )r   r   rA   rO   '  sH   

zPartialLALRParser.__init__c                 C   s,   |  | }t|||_t|j|||_|S rM   )__new__r*   deserializer   r   rW   )clsdatamemorQ   r   instr@   r@   rA   r   [  s   
zPartialLALRParser.deserializeNFFrs   )r9   r:   r;   rO   classmethodr   r@   r@   r@   rA   r   &  s    
4r   c                       sh   e Zd ZdZ			d fdd	Zd fdd	Zddd	Zd
d Zdd Zdd Z	dd Z
dd Z  ZS )PartialParserStaterL   NFc                    s   t  j||||d || _d S )N)state_stackvalue_stackrN   rO   rL   )rP   
parse_confrc   r   r   rL   rR   r@   rA   rO   f  s   
zPartialParserState.__init__c           
   	      s  |j dkrl| jd }t| jj| }d}|jjD ]:}|j|jvrPt	
|jd|}t| j}zz| j||d d}W W || _ n tyK   Y W || _qw || _w d}q|sjdd | jj|  D }	t||	| d d	d S | jryt j||d d S | j||d d S )
NpartialF rp   Tc                 S      h | ]}|  r|qS r@   isupperr   r   r@   r@   rA   	<setcomp>  s    z0PartialParserState.feed_token.<locals>.<setcomp>rv   interactive_parser)rj   r   r   rc   r   valuerE   r6   ignore_typesr   new_borrow_posr   feed_token_no_stackr"   r   r   r   rL   rN   
feed_token)
rP   tokenrq   current_stateZcurrent_lexerr7   Zterminal_infoZ
test_tokenstackexpectedrR   r@   rA   r   s  s@   




zPartialParserState.feed_tokenc              	   C   s   | j }| jj}| jj}	 |d }z|| |j \}}W n ty5   dd ||  D }	t||	| ddw ||ks<J |tu rK|rDJ |	| dS |}
t
|
j}|rZ|| d= ||d  |
jj \}}|tu slJ |	| |r{|d |kr{dS q)a  
        This is a copy of `ParserState.feed_token` with all the value stack
        steps removed.  Since we're not exactly parsing in order to obtain a
        CST or anything similar, we can avoid the growing expense of tracking
        the parse tree.
        Tr   c                 S   r   r@   r   r   r@   r@   rA   r         z9PartialParserState.feed_token_no_stack.<locals>.<setcomp>Nr   )r   r   r   	end_staterj   KeyErrorr   r"   r+   appendlenZ	expansionoriginr   )rP   r   rq   r   r   r   rv   actionargr   rulesize_action	new_stater@   r@   rA   r     s8   


z&PartialParserState.feed_token_no_stackc                 C   s   | j jj}|d u r| j ddddd}ntdd|}|d u p.|jdkp.tdd |jj	D }|r:| j
|dd	 d S t|g | d d
)Nz$ENDr   r   r   r   c                 s   r   rM   )r8   )r   tir@   r@   rA   r     r   z.PartialParserState.feed_eof.<locals>.<genexpr>Trp   r   )rc   rv   
last_token_Tokenr   r   rj   anyr   rE   r   r"   )rP   r   Z	eof_tokenZnew_token_is_legalr@   r@   rA   feed_eof  s   
zPartialParserState.feed_eofc                 C   s   | j jj| j S rM   )r   r   r   positionrl   r@   r@   rA   choices     zPartialParserState.choicesc              	   C   st   t  }t| j}i |_|  D ](}| r7t| }||_z||j|d W n	 t	y1   Y qw |
| q|S )z
        Adapted from https://github.com/lark-parser/lark/blob/be542c2ff6d968817df019b8bf03f37b3111c08c/lark/parsers/lalr_interactive_parser.py#L95
        Returns the set of possible tokens that will advance the parser into a new valid state.
        r   )r   r   r   rQ   r   r   r   rc   r   r"   r   )rP   acceptsZconf_no_callbackstr   r@   r@   rA   r     s   

zPartialParserState.acceptsc                 C   s,   t | | jt| jt| jt| j| jdS )Nr   )rj   r   r   rc   r   r   r   rL   rl   r@   r@   rA   rw     s   zPartialParserState.__copy__c                 C   rx   )Nry   z, state_stack=rz   )rj   r9   rc   r   rl   r@   r@   rA   rm     r{   zPartialParserState.__repr__NNFrs   )r9   r:   r;   	__slots__rO   r   r   r   r   r   rw   rm   rU   r@   r@   rR   rA   r   c  s    
.*	r   c                       s4   e Zd Zd	 fdd	Z	d
ddZdddZ  ZS )r   Fc                    s   t  j|||d || _d S )Nr   r   )rP   r   rQ   r   rL   rR   r@   rA   rO     s   
zPartialParser.__init__Nc                 C   sB   t | j| j|}t|t|||| j}|rt| ||jS | |S rM   )	r.   r   rQ   r   r   rL   r,   rc   rr   )rP   rc   rI   r   r   Zstart_interactiver   parser_stater@   r@   rA   parse  s   
zPartialParser.parsec                 C   s   z |}|j |D ]}|| q	|r|r|jdkr|  |W S  tyB } zzt| ||j |_W | ty=   Y |w d }~w t	ym   | j
rltd td td t|jD ]\}}td| | q\td  w )Nr   r   zSTATE STACK DUMPz----------------z%d))rc   lexr   rj   r   r   r,   r   	NameError	Exceptionr   print	enumerater   )rP   rv   r   rq   r   eir   r@   r@   rA   rr     s4   zPartialParser.parse_from_stater   r  )NF)r9   r:   r;   rO   r  rr   rU   r@   r@   rR   rA   r     s
    
r   c                   @   sh   e Zd Zeedd ZdddZdeeedf eedf f fdd	Z	dde
eedf  fddZd
S )PartialScannerc                 C   s2   |j  }t|}t|  \}}||jfS rM   )pattern	to_regexpinteregularparse_patternr3   to_fsmreduceprefix_postfix)r   terminalZ	regex_strr  r   _r@   r@   rA   construct_terminal_fsm0  s   


z%PartialScanner.construct_terminal_fsmFc           
      C   sx   || _ || _|| _|| _dd | j D | _d | _g }| j D ]}| |\}}	|	dks,J || qt|\| _	| _
d S )Nc                 S   s   h | ]}|j qS r@   r   )r   r   r@   r@   rA   r   ?      z*PartialScanner.__init__.<locals>.<setcomp>)r   r   )r   g_regex_flags	use_bytesmatch_wholeallowed_typesZ_mresr  r   	fsm_unionr   r   )
rP   r   r  Zre_r  r  fsmsr   r   r  r@   r@   rA   rO   :  s   
zPartialScanner.__init__r]   .c           
      C   sb   d}d}t t|| jD ] \}\}}}| j| j}t||||}	||	f7 }|r,||	f7 }q||fS )z<Get the possible terminal symbols for an FSM state sequence.r@   )r	  get_sub_fsms_from_seqr   r   r   r4   )
rP   rC   rE   rF   r  fsm_idZfsm_reads_moreZin_finalr6   infor@   r@   rA   get_terminals_infoM  s   


z!PartialScanner.get_terminals_infoNlast_fsm_state_seqc           
      C   s   |}|rt |dksJ |t |d 7 }|d }n| jj}||d }t| jjj| jjj|}t| j||| jd}|s=dS |rG|t	| }	|	S |ft	| }	|	S )zYDetermine an FSM match over `text` starting at `pos` and continuing `last_fsm_state_seq`.r   r   N)
full_match)
r   r   initialr2   Zfsm_infoZalphabet_symbol_mappingZalphabet_anything_valuewalk_fsmr  r   )
rP   textposr"  	start_posstart_stateZ	text_partZtext_transitions	state_seqresr@   r@   rA   match^  s2   
zPartialScanner.matchrs   rM   )r9   r:   r;   r   r   r  rO   r   r4   r!  r   r<   r,  r@   r@   r@   rA   r  /  s    

 r  c                   @   s2   e Zd ZddddZdededee fd	d
ZdS )r   r@   confr   c              	      s   t |j}|j t|}||_i }i | _| D ]E\}}t|}	z||	 }
W n1 tyW   t|t|j	B t|B }t|} fdd|D |_|jsMY qt
|}
|
||	< Y nw |
| j|< q|j|u seJ t
|| _d S )Nc                    s   g | ]
}| v r | qS r@   r@   )r   nterminals_by_namer@   rA   r     s    z3PartialContextualLexer.__init__.<locals>.<listcomp>)listr   r0  r   r   r   r   r   r   r`   r^   
root_lexer)rP   r-  r   Zalways_acceptr   Z	trad_confZlexer_by_symbolsrv   r   r   rc   r_   r@   r/  rA   rO     s0   

zPartialContextualLexer.__init__lexer_stater  r]   c                 c   s    z	 | j |j }|||}|V  q ty   Y d S  tyH   t|j|jjkrEt	|j|jj|jj
|jjd|jo>|jg|| jjdY d S w )NTFallowedZtoken_historyrv   r0  )r   r   
next_tokenEOFErrorr   r   r&  line_ctrchar_posr!   linecolumnr   r2  r   )rP   r3  r  rc   Znext_tokr@   r@   rA   r    s.   zPartialContextualLexer.lexN)r@   r-  r   )	r9   r:   r;   rO   r   r   r
   r   r  r@   r@   r@   rA   r     s    r   c                       sH   e Zd Zd fddZdd Zddd	Zdd
ededefddZ	  Z
S )r^   r-  r   c                    s   t  | |   d S rM   )rN   rO   _build_scanner)rP   r-  rR   r@   rA   rO     s   zPartialBasicLexer.__init__c                 C   s   t | j| j| j| j\}| _| jrJ | j D ]#\}}| j| }|j	jD ]}| j
| | j|}| j|| q%qt| j| j| j| j| _d S rM   )r#   r   r  rer  callbackZuser_callbacksr   r0  r   removeindexinsertr  Z_scanner)rP   r   r6   r?  r  Zsub_terminalidxr@   r@   rA   r=    s   



z PartialBasicLexer._build_scannerNc                 C   s   | j |||S rM   )r   r,  )rP   r&  r'  r"  r@   r@   rA   r,    r   zPartialBasicLexer.match	lex_stater  r]   c              
   C   sl  |j }d }|r|jdkr|jj}|j}|j|rt|d nd }|t|jk r2| |j|j|}|sm|r>|d | j	j
jvr`| j	j| j }|sJdh}t|j|j|j|j||j oZ|j g|| jd|jj}	|jj}
|jj}n
|}	| j	|	\}
}|r}|d n|
d }|j p|jpt|
dk}|j}|t|	 d }|t|jkr|rd}t|	||
|}d}n|j}|j||  }}t| jtsJ || jvrt|||j|j|j}|||| jv  |j|_|j|_ |j|_!|j| jv r| j|j |}t|tst"d| ||_ |S || jv r t|||j|j|j}| j| | |||| jv  d }|t|jk s(t#| )	Nr   r   r   r   z<END-OF-FILE>r4  r   z+Callbacks must return a token (returned %r))$r   rj   r   rC   r8  r9  r   r&  r,  r   r   finalsr  r   r!   r:  r;  r0  rE   rF   r!  r8   r7   rB   r6   re   r?  r   r   feedZnewline_typesend_lineZ
end_columnend_posr   r7  )rP   rD  r  r   r"  r8  rH  r+  r5  rC   rE   rF   Zpriority_terminal_inforD   r(  	type_nameZtoken_valuer   r   t2r@   r@   rA   r6    s   



czPartialBasicLexer.next_tokenr<  rM   )r9   r:   r;   rO   r=  r,  r   r   r   r6  rU   r@   r@   rR   rA   r^     s
    
# r^   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )PartialIndenterzJAn `Indenter` that doesn't reset its state every time `process` is called.c                 C   s
   |  |S rM   )_process)rP   streamr@   r@   rA   processY  s   
zPartialIndenter.processc                 c   s~    |D ]9}|j | jv r|  jd7  _n|j | jv r*|  jd8  _| jdk r*t|g |j | jkr9| |E d H  q|V  qd S )Nr   r   )rj   OPEN_PAREN_typesparen_levelCLOSE_PAREN_typesr"   NL_typeZ	handle_NL)rP   rM  r   r@   r@   rA   rL  \  s   

zPartialIndenter._processc                 C   s    || j v r| jd dk rdS dS )Nr   r   FT)rQ  rP  )rP   
token_typer@   r@   rA   accepts_token_typeq  s   z"PartialIndenter.accepts_token_typec                 C   s"   t |  }| j|_t| j|_|S rM   )rj   rP  r   indent_level)rP   r+  r@   r@   rA   rw   |  s   
zPartialIndenter.__copy__c                 C   rx   )Nz(paren_level=z, indent_level=rz   )rj   r9   rP  rU  rl   r@   r@   rA   rm     r{   zPartialIndenter.__repr__N)	r9   r:   r;   __doc__rN  rL  rT  rw   rm   r@   r@   r@   rA   rK  V  s    rK  c                   @   s,   e Zd ZdZg dZg dZdZdZdZdS )PartialPythonIndenterZ_NEWLINE)LPARLSQBLBRACE)RPARRSQBRBRACEZ_INDENTZ_DEDENT   N)	r9   r:   r;   rR  rO  rQ  ZINDENT_typeZDEDENT_typeZtab_lenr@   r@   r@   rA   rW    s    rW  r   c                 C   s   t | jtr	| jS | jjS rM   )re   rc   r   r   r@   r@   rA   r     s   r   lpr]   c              	   C   s^   i }| j D ]'}t|j }zt|  \}}W n ty&   d}Y nw |||j	< q|S )zRConstruct a ``dict`` mapping terminal symbol names to their finite state machines.N)
r   r  r  r  r  r3   r  r  r   r   )r_  Zsymbol_names_and_fsmsr  r  r   r  r@   r@   rA   terminals_to_fsms  s   
r`  r  c              	      s  t jdd | D  \}tt|  dd  D }dtf fdd}|g}t }i }i }d}|t|k r|| tfd	d
 D rI|| i ||< |j	D ]w}	z||	}
W n	 t
yb   Y qPw z||
}W n ty|   t|}||
 Y nw ||| |	< |
 D ]?\}}||t t i f\}}}| }|}||t | ||t | |||f || | jv r|| qqP|d7 }|t|k s5t|tt|d||dd}t|\}fddt| dd dD }||fS )a  Construct an FSM representing the union of the FSMs in `fsms`.

    This is an updated version of `interegular.fsm.FSM.union` made to return an
    extra map of component FSMs to the sets of state transitions that
    correspond to them in the new FSM.

    c                 S   s   g | ]}|j qS r@   )alphabet)r   r   r@   r@   rA   r     r  zfsm_union.<locals>.<listcomp>c                 S   s   i | ]\}}||j qS r@   )r$  )r   r  r   r@   r@   rA   r     r   zfsm_union.<locals>.<dictcomp>new_transitionc                    sh   i } D ])\}}| | }|| v r-| | |j v r-||j | |  v r-|j | |  | ||< q|s2t|S rM   )mapr   )r   rb  nextr  fZold_transition)indexed_fsms
new_to_oldr@   rA   follow  s   zfsm_union.<locals>.followr   c                 3   s&    | ]\}}  |d |jv V  qdS )r   N)r   rE  )r   jr   )rv   r@   rA   r     s   $ zfsm_union.<locals>.<genexpr>r   T)ra  r   r$  rE  rc  Z__no_validation__c                    sP   i | ]$\}\}}}| fd d|D  fdd|D  fdd|  D fqS )c                    s    h | ]\}} |  | fqS r@   r@   )r   s1s2old_to_new_statesr@   rA   r   	  s     z'fsm_union.<locals>.<dictcomp>.<setcomp>c                       h | ]} | qS r@   r@   r   rl  r@   rA   r   
  r   c                    s$   i | ]\}}| fd d|D qS )c                    rn  r@   r@   )r   r   rl  r@   rA   r     r   z2fsm_union.<locals>.<dictcomp>.<dictcomp>.<setcomp>r@   )r   	old_stater   rl  r@   rA   r     s    z(fsm_union.<locals>.<dictcomp>.<dictcomp>)r   )r   r  r   rE  r   rl  r@   rA   r     s    	
c                 S   s   | d S )Nr   r@   r   r@   r@   rA   r     r   zfsm_union.<locals>.<lambda>r   )r   unionr   r	  r<   r   r   r   r   Zby_transitionr   rA  
ValueErrorr   r   r   rE  r   ranger3   r   )r  ra  r$  rh  r   rE  rc  r   r  Z
transitionrd  ri  r  Z	fsm_statefsm_transitions
fsm_finalsZfsm_old_to_newZold_fromZold_tor   Z_fsms_to_trans_finalsr@   )rf  rg  rm  rv   rA   r    s|   


*
	
	r  r*  r   c                 #   sL    t tdd dd d   fdd| D E dH  dS )a  Get the indices of the sub-FSMs in `fsm` that could have matched the state sequence `state_seq`.

    Parameters
    ----------
    state_seq
        A state sequence.
    fsms_to_trans_finals
        A map from FSM indices to tuples containing sets of their state transitions
        and sets of the final/accept states.

    Returns
    -------
    A generator returning tuples containing each sub-FSM index (in the order
    they were union-ed to construct `fsm`) and booleans indicating whether or
    not there is another valid transition from the last state in the sequence
    for the associated sub-FSM (i.e. if the FSM can continue
    accepting/matching) and whether or not the sequence ends in a final state
    of the sub-FSM.
    Nr   r   c                 3   sH    | ]\}\}}} |r|t fd d|D d |v fV  qdS )c                 3   s    | ]	\}} |kV  qd S rM   r@   )r   Zfrom_sZto_s)last_fsm_stater@   rA   r   ;  s    z2get_sub_fsms_from_seq.<locals>.<genexpr>.<genexpr>r   N)issubsetr   )r   Zfsm_idxr   rE  r  ru  r*  Zstate_seq_transitionsr@   rA   r   6  s    	

z(get_sub_fsms_from_seq.<locals>.<genexpr>)r   r   r   )r*  r   r@   rw  rA   r    s   	r  Tr   token_transition_keysr)  r#  c                 C   s   | j }|}g }d}| j}t|D ]0\}	}
|||
f}|d u r1|s-|dkr-|d |   S g   S |}||v r;|	d }|| q|rK|d |	krKg S |S )Nr   r   )rE  Zflat_transition_mapr	  r   r   )r   rx  r)  r#  rt  rv   Zaccepted_statesZlast_final_idxrs  r  Z	trans_keyr   r@   r@   rA   r%  D  s$   r%  )T)[r   r   dataclassesr   	functoolsr   typingr   r   r   r	   r
   r   r   r   r   r   r   r  Zinteregular.fsmr   r   r   Zinteregular.patternsr   Zlarkr   r   Zlark.commonr   r   Zlark.exceptionsr   r   Zlark.indenterr   Z
lark.lexerr   r   r   r   r    r!   r"   r#   Zlark.parser_frontendsr$   r%   r&   Zlark.parsers.lalr_analysisr'   r(   r)   r*   r+   Z$lark.parsers.lalr_interactive_parserr,   Zlark.parsers.lalr_parserr-   r.   r/   r0   Zoutlines_core.fsm.regexr1   r2   r3   r>   r<   ro   ZParseStateTyper4   rB   rG   rV   ru   r~   rb   r   r   r   r  r   r^   rK  rW  r   r`  r  r?   r  r%  r@   r@   r@   rA   <module>   s    4(
?
{= !,U4 0	8
t,
-