o
    0 i@                     @   sx  d dl mZ d dlmZmZmZmZ d dlmZm	Z	 d dl
Z
eG dd dZG dd deZG d	d
 d
eZeG dd deZeG dd deZeG dd deZeG dd deZeG dd deZeG dd deZeG dd deZG dd dZdeeef ddfddZdedefd d!Zdedefd"d#Zdedefd$d%Zd)d&d'Zed(kre  dS dS )*    )	dataclass)OptionalListTupleIterator)ABCabstractmethodNc                   @   sf   e Zd ZU dZeed< eed< ddedd fddZdefd	d
ZddedefddZ	defddZ
dS )Positionz2Tracks position in source text for error reportingtextpos   nreturnc                 C   s   t | j| j| S N)r	   r
   r   selfr    r   c/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/llguidance/gbnf_to_lark.pyadvance      zPosition.advancec                 C   s    | j t| jk r| j| j  S dS N )r   lenr
   r   r   r   r   current   s    zPosition.currentc                 C   s   | j | j| j|  S r   )r
   r   r   r   r   r   peek      zPosition.peekc                 C   sf   | j dd| jd }| j td| jd | j }| j | j| jd  }d| dt| dt| S )N
r   r      zline z, z ^ )r
   countr   maxrepr)r   line_noZprefsuffr   r   r   __str__   s   zPosition.__str__N)r   )__name__
__module____qualname____doc__str__annotations__intr   r   r   r$   r   r   r   r   r	   	   s   
 r	   c                       s&   e Zd Zdedef fddZ  ZS )GbnfToLarkErrorr   messagec                    s    || _ t | d|  d S )Nz at )r   super__init__)r   r   r-   	__class__r   r   r/   !   s   zGbnfToLarkError.__init__)r%   r&   r'   r	   r)   r/   __classcell__r   r   r0   r   r,       s    r,   c                   @   sd   e Zd ZedefddZdefddZdefddZdefdd	Z	dd
dZ
ded  fddZdS )ASTNoder   c                 C   s   d S r   r   r   r   r   r   r$   (   s   zASTNode.__str__c                 C      dS )NTr   r   r   r   r   	is_atomic,      zASTNode.is_atomicc                 C   s   t dd |  D S )Nc                 s   s    | ]}|  V  qd S r   )is_terminal).0cr   r   r   	<genexpr>0       z&ASTNode.is_terminal.<locals>.<genexpr>)allchildrenr   r   r   r   r7   /   r   zASTNode.is_terminalc                 C   s   |   S r   )r$   r   r   r   r   top_str2      zASTNode.top_strc                 C   s   | S r   r   r   r   r   r   simplify5   r6   zASTNode.simplifyc                 C   s   g S r   r   r   r   r   r   r=   8   r6   zASTNode.childrenN)r   r3   )r%   r&   r'   r   r)   r$   boolr5   r7   r>   r@   listr=   r   r   r   r   r3   '   s    
r3   c                   @   $   e Zd ZU eed< defddZdS )LiteralNodevaluer   c                 C      d| j  dS )N")rE   r   r   r   r   r$   @      zLiteralNode.__str__Nr%   r&   r'   r)   r*   r$   r   r   r   r   rD   <      
 rD   c                   @   rC   )	RegexNoderxr   c                 C   rF   )N/)rL   r   r   r   r   r$   H   rH   zRegexNode.__str__NrI   r   r   r   r   rK   D   rJ   rK   c                   @   sB   e Zd ZU eed< dZed ed< defddZdefdd	Z	dS )
RuleRefNodenameNRuleNodetargetr   c                 C   s   | j d u rdS | j jS NF)rQ   rule_is_terminalr   r   r   r   r7   Q   s   
zRuleRefNode.is_terminalc                 C   s   | j d u r| jS | j jS r   )rQ   rO   r   r   r   r   r$   V   s   
zRuleRefNode.__str__)
r%   r&   r'   r)   r*   rQ   r   rA   r7   r$   r   r   r   r   rN   L   s
   
 rN   c                   @   sX   e Zd ZU eed< eed< ee ed< dee fddZdefddZ	de
fd	d
ZdS )RepetitionNodenode	min_times	max_timesr   c                 C      | j gS r   )rU   r   r   r   r   r=   b   r?   zRepetitionNode.childrenc                 C   s   | j  | _ | S r   )rU   r@   r   r   r   r   r@   e   s   zRepetitionNode.simplifyc                 C   s   t | j}| j sd| d}| jdkr| jd u r| dS | jdkr.| jd u r.| dS | jdkr=| jdkr=| dS | jd urGt | jnd}| d	| j d
| dS )N()r   *r   +?r   {,})r)   rU   r5   rV   rW   )r   innerZmax_strr   r   r   r$   i   s   




zRepetitionNode.__str__N)r%   r&   r'   r3   r*   r+   r   r   r=   r@   r)   r$   r   r   r   r   rT   \   s   
 rT   c                   @   sV   e Zd ZU ee ed< defddZdefddZ	defddZ
dee fd	d
ZdS )SequenceNodenodesr   c                 C   s    | j sdS ddd | j D S )Nz"" c                 s       | ]}t |V  qd S r   r)   )r8   rU   r   r   r   r:   ~   r;   z'SequenceNode.__str__.<locals>.<genexpr>)rc   joinr   r   r   r   r$   {   s   zSequenceNode.__str__c                 C   r4   rR   r   r   r   r   r   r5      r6   zSequenceNode.is_atomicc                 C   D   t t| jD ]}| j|  | j|< qt| jdkr | jd S | S Nr   r   )ranger   rc   r@   r   ir   r   r   r@      
   
zSequenceNode.simplifyc                 C      | j S r   )rc   r   r   r   r   r=         zSequenceNode.childrenN)r%   r&   r'   r   r3   r*   r)   r$   rA   r5   r@   rB   r=   r   r   r   r   rb   w   s   
 rb   c                   @   sd   e Zd ZU ee ed< defddZdefddZde	fddZ
defd	d
Zdee fddZdS )AlternativeNodealternativesr   c                 C   s   d dd | jD S )Nz
     | c                 s   re   r   rf   r8   Zaltr   r   r   r:      r;   z*AlternativeNode.top_str.<locals>.<genexpr>rg   rq   r   r   r   r   r>      r   zAlternativeNode.top_strc                 C   s   dd dd | jD  d S )NrY   z | c                 s   re   r   rf   rr   r   r   r   r:      r;   z*AlternativeNode.__str__.<locals>.<genexpr>rZ   rs   r   r   r   r   r$      s   zAlternativeNode.__str__c                 C   r4   rR   r   r   r   r   r   r5      r6   zAlternativeNode.is_atomicc                 C   rh   ri   )rj   r   rq   r@   rk   r   r   r   r@      rm   zAlternativeNode.simplifyc                 C   rn   r   rq   r   r   r   r   r=      ro   zAlternativeNode.childrenN)r%   r&   r'   r   r3   r*   r)   r>   r$   rA   r5   r@   rB   r=   r   r   r   r   rp      s   
 rp   c                   @   sV   e Zd ZU eed< eed< eed< dZeed< dZde	e fdd	Z
defd
dZdS )rP   rO   rq   commentFrS   r   r   c                 C   rX   r   rt   r   r   r   r   r=      r?   zRuleNode.childrenc                 C   s   | j  | j d| j  S )Nz: )ru   rO   rq   r>   r   r   r   r   r$      s   zRuleNode.__str__N)r%   r&   r'   r)   r*   r3   rS   rA   orderr   r=   r$   r   r   r   r   rP      s   
 rP   c                   @   s  e Zd Zd'ddZdedeeef fddZdede	eef fd	d
Z
dede	eef fddZdede	eef fddZedede	eef fddZedede	eef fddZdededefddZededefddZededefddZdede	eef fddZdedede	eef fddZdedede	eef fd d!Zdedede	eef fd"d#Zded$ee defd%d&ZdS )(GrammarParserr   Nc                 C   s
   d| _ d S r   )curr_commentr   r   r   r   r/      s   zGrammarParser.__init__r
   c                 C   s`   t |d}| j|dd}g }| r)| |\}}|| | j|dd}| sdd |D S )Nr   Tallow_newlinesc                 S   s   i | ]}|j |qS r   )rO   )r8   ruler   r   r   
<dictcomp>   s    z'GrammarParser.parse.<locals>.<dictcomp>)r	   _skip_spacer   _parse_ruleappend)r   r
   r   rulesr{   r   r   r   parse   s   

zGrammarParser.parser   c                 C   s  dt dtfdd}| dkr|dd st|d| }| }|d	v r/d| | fS |d
krZ|ddd }t|dksF||sNt|d| |d}d| |fS |dkr|ddd }t|dksq||syt|d| |d}d|d |fS |dkr|ddd }t|dks||st|d| |d}d|d |fS t|d| | dkrt|d| | fS )Nsr   c                 S   s   t dd | D S )Nc                 s   s    | ]}|d v V  qdS )Z0123456789abcdefABCDEFNr   )r8   chr   r   r   r:      r;   z@GrammarParser._parse_char.<locals>.is_all_hex.<locals>.<genexpr>)r<   )r   r   r   r   
is_all_hex   r   z-GrammarParser._parse_char.<locals>.is_all_hex\   r   zIncomplete escape sequencez"\[]nrtx   zInvalid \x escape sequence: \xz\xu      zInvalid \u escape sequence: \uz\u0U	      zInvalid \U escape sequence: \Uz\UzInvalid escape sequence \r   zUnexpected end of input)r)   rA   r   r   r,   r   r   lstrip)r   r   r   r9   Z	hex_valuer   r   r   _parse_char   sH   







zGrammarParser._parse_charc                 C   sh   |  dkrt|dd}| }	 | |\}}|dv r$|d| 7 }n||7 }|dkr-nqt||fS )N[zExpected '['Tz/[r   ])r   r,   r   r   rK   r   r   rr9   r   r   r   _parse_char_class   s   
	zGrammarParser._parse_char_classc                 C   sR   |  dkrt|d| }d}	 | |\}}|dkrn||7 }qt||fS )NrG   zExpected '"'r   )r   r,   r   r   rD   r   r   r   r   _parse_literal  s   
zGrammarParser._parse_literalc                 C   sR   | j }t|  r|  } t|  s
| j |krt| d| j|| j  | fS )NzExpected name)r   rw   _is_word_charr   r   r,   r
   r   startr   r   r   _parse_name  s   

zGrammarParser._parse_namec                 C   sR   | j }|   r|  } |   s	| j |krt| dt| j|| j  | fS )NzExpected integer)r   r   isdigitr   r,   r+   r
   r   r   r   r   
_parse_int  s   

zGrammarParser._parse_intrz   c                 C   s   |  rX|  dv r| }nE|r|  dv rt|}n7|  dkrQ| }d}|  rG|  dvrG||  7 }| }|  rG|  dvs3|  j|d 7  _n	 |S |  s|S )Nz 	
#z//r   )r   r   rw   _skip_newlinerx   )r   r   rz   cmtr   r   r   r}   !  s"   
zGrammarParser._skip_spacec                 C   sD   |   dkr|  } |   dkr|  } | S |   dkr |  } | S )Nr   )r   r   )r   r   r   r   r   2  s   zGrammarParser._skip_newliner9   c                 C   s   |   p| dkp| dkS )N-_)isalnum)r9   r   r   r   r   <  s   zGrammarParser._is_word_charc                 C   s   |  |\}}| j|dd}|ddkrt|d|d}| j|dd}| j|dd\}}| |}| j}d| _t||||fS )	NFry   r   z::=zExpected ::=T	is_nestedr   )	r   r}   r   r,   r   _parse_alternativesr   rx   rP   )r   r   rO   rq   r   r   r   r   r~   @  s   


zGrammarParser._parse_ruler   c                 C   s`   g }	 |  ||\}}|| | j||d}| dkrn| }| j|dd}qt||fS )NTry   |)_parse_sequencer   r}   r   r   rp   )r   r   r   rq   sequencer   r   r   r   P  s   
z!GrammarParser._parse_alternativesc                 C   sH  g }|  r|  dvr|s|  dvr|  dkr'| |\}}|| nQ|  dkr:| |\}}|| n>|  dkrO| j||d\}}|| n)|  dkra|td | }n| |  rw| |\}}|t	| nn&| j
||d}| ||}| j
||d}|  r|  dvr|s|  dvst||fS )	Nz|)r   rG   r   rY   r   .ry   )r   r   r   r   _parse_grouprK   r   r   r   rN   r}   _parse_repetitionrb   )r   r   r   rc   rU   rO   r   r   r   r   b  sB   
zGrammarParser._parse_sequencec                 C   sj   |  dkrt|d| }| |d}| j|dd\}}|  dkr)t|d| }|| ||fS )NrY   zExpected '('Tr   rZ   zExpected ')')r   r,   r   r}   r   )r   r   r   rq   r   r   r   r     s   

zGrammarParser._parse_grouprc   c                 C   sh  |s|S |  dkrt|d dd |d< | S |  dkr,t|d dd |d< | S |  dkr@t|d dd|d< | S |  dkr| }| |d}| |\}}| |d}|  d	krqt|d |||d< | S |  d
kr| | d}d }|   r| |\}}| |d}|  d	krt|dt|d |||d< | S t|d|S )Nr[   r   r\   r   r]   r^   Tr`   r_   zExpected '}'zExpected ',' or '}')r   rT   r   r}   r   r   r,   )r   r   rc   rV   rW   r   r   r   r     s>   

zGrammarParser._parse_repetitionr   N)r%   r&   r'   r/   r)   dictrP   r   r	   r   r   r3   r   r   staticmethodr   r+   r   rA   r}   r   r   r~   rp   r   rb   r   r   r   r   r   r   r   r   rw      s@    
*	



"rw   r   r   c                    sx  dt dtdd ffdd}t D ]\}}||_|j |_qdtdtt f fdd  D ]"} |D ]}t	|t
rS|jvrMtd	|j d
|j |_q8q2dvr]td|d d d}|dkrd} D ]}|jdkr|js|j rd|_|d7 }qp|dksjt D ](}|jdd}tdd| }|jr| }n| }|j|kr||| qd S )Nr   rO   r   c                    s2   | v rt d| d | j= || _|  |< d S )NRule 'z' already exists)	ExceptionrO   )r   rO   )r   r   r   rename  s
   zresolve.<locals>.renamerU   c                 3   s(    |   D ]}|V   |E d H  qd S r   )r=   )rU   r9   )all_childrenr   r   r     s
   zresolve.<locals>.all_childrenr   z' not foundrootzNo 'root' rule foundr   r   r   Tr   r   z([a-z])([A-Z])z\1_\2)rP   r)   	enumeratevaluesrv   rq   r@   r3   r   
isinstancerN   rO   r   rQ   rS   r7   rB   replaceresublowerupper)r   r   rl   r   rU   Znum_fixnew_namer   )r   r   r   resolve  sN   





r   r
   c                 C   s   t  }|| }t| t| }|jdd d d}d}|D ] }t|}|s0d|v r0|d7 }||d 7 }d|v }|r@|d7 }q |S )z<
    Convert a GBNF (llama.cpp) grammar to Lark syntax.
    c                 S   rn   r   )rv   )r   r   r   r   <lambda>  s    zgbnf_to_lark.<locals>.<lambda>)keyz%llguidance {}

Tr   )rw   r   r   rB   r   sortr)   )r
   parserr   ZrlistresZprev_nlr   r   r   r   r   gbnf_to_lark  s"   
r   c                 C   s   t d| duS )z6
    Check if the text is already in Lark syntax.
    z!(?m)^\s*(%llguidance\b|start\s*:)N)r   searchr
   r   r   r   is_lark_syntax  s   r   c                 C   s   t | r| S t| S )z5
    Convert a grammar to Lark syntax if needed.
    )r   r   r   r   r   r   any_to_lark	  s   r   c                     sf   dd l } dd l dtdd f fdd}t| jdk r#td | d | jdd  D ]}|| q*d S )	Nr   fnr   c                    s   t |  dddd t| }| }W d    n1 sw   Y  t|} j| d d }t|d}|| W d    n1 sEw   Y  t d d S )	Nz... r   T)endflushr   z.larkwOK)printopenreadr   pathsplitextwrite)r   fr
   ZlarkZfn_larkosr   r   process_file  s   

zmain.<locals>.process_filer   z*Usage: gbnf_to_lark.py <file1> <file2> ...r   )sysr   r)   r   argvr   exit)r   r   r   r   r   r   main  s   


r   __main__r   ) dataclassesr   typingr   r   r   r   abcr   r   r   r	   r   r,   r3   rD   rK   rN   rT   rb   rp   rP   rw   r   r)   r   r   rA   r   r   r   r%   r   r   r   r   <module>   sB     4
	
