o
    0 i                     @   s   d dl mZ d dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZmZ d dlmZmZ d dlmZmZmZ d dlmZmZmZ d dlmZmZ eG d	d
 d
ZG dd dZdS )    )
namedtuple)	dataclass)combinations)ListTupleAnyDictIterableSet	FrozenSetOptional)InvalidSyntaxREFlags)FSMAlphabetanything_else)PatternUnsupportedparse_pattern)logger	soft_reprc                	   @   sV   e Zd ZU dZeed< eed< eed< 		dded	ed
edefddZedd Z	dS )ExampleCollisiona  
    Captures the full text of an example collision between two regex.
    `main_text` is the part that actually gets captured by the two regex
    `prefix` is the part that is potentially needed for lookbehinds
    `postfix` is the part that is potentially needed for lookahead
    prefix	main_textpostfixExample Collision:  Fintroindentforce_pointerreturnc           
      C   s   t |t |k rtdt| j}t| j}t| j}| | | }t |t |krLdt |t | t |  }dt | }	| | d| | |	 S | | S )an  
        Formats this example somewhat similar to a python syntax error.
        - intro is added on the first line
        - indent is added on the second line
        The three parts of the example are concatenated and `^` is used to underline them.

        ExampleCollision(prefix='a', main_text='cd', postfix='ef').format_multiline()

        leads to

        Example Collision: acdef
                             ^^

        This function will escape the character where necessary to stay readable.
        if `force_pointer` is False, the function will not produce the second line if only main_text is set
        z'Can't have intro be shorter than indent ^
)len
ValueErrorr   r   r   r   )
selfr   r   r   r   r   r   text
whitespaceZpointers r)   b/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/interegular/comparator.pyformat_multiline   s   


z!ExampleCollision.format_multilinec                 C   s   | j | j | j S N)r   r   r   r&   r)   r)   r*   	full_text7   s   zExampleCollision.full_textN)r   r   F)
__name__
__module____qualname____doc__str__annotations__boolr+   propertyr.   r)   r)   r)   r*   r      s   
 
r   c                
   @   s   e Zd ZdZdeeef fddZdedefddZ	ded	ede
fd
dZd dee de
deeeef  fddZd!ded	ededefddZded	ede
fddZedd Zdd Zded	efddZedeeef fddZdS )"
Comparatora  
    A class that represents the main interface for comparing a list of regex to each other.
    It expects a dictionary of arbitrary labels mapped to `Pattern` instances,
    but there is a utility function to create the instances `from_regex` strings.

    The main interface function all expect the abitrary labels to be given, which
    then get mapped to the correct `Pattern` and/or `FSM` instance.

    There is a utility function `mark(a,b)` which allows to mark pairs that shouldn't
    be checked again by `check`.
    patternsc                 C   s|   || _ t | _|sd S tjdd | D  d | _dd | D }tdd |D tdd |D f| _i | _	i | _
d S )Nc                 s   s    | ]
}| td V  qdS r   N)get_alphabetr   .0pr)   r)   r*   	<genexpr>N   s    z&Comparator.__init__.<locals>.<genexpr>r   c                 S   s   g | ]}|j qS r)   )prefix_postfixr;   r)   r)   r*   
<listcomp>O   s    z'Comparator.__init__.<locals>.<listcomp>c                 s       | ]}|d  V  qdS r9   r)   r;   r)   r)   r*   r>   P       c                 s   rA   )   Nr)   r;   r)   r)   r*   r>   P   rB   )	_patternsset_marked_pairsr   unionvalues	_alphabetmax_prefix_postfix_fsms_know_pairs)r&   r8   Zprefix_postfix_sr)   r)   r*   __init__I   s   &
zComparator.__init__ar    c              
   C   s   || j vrGz| j| | j| j| j |< W n1 ty9 } zd | j |< td| dt|  W Y d }~nd }~w t	yF   d | j |< Y nw | j | S )Nz!Can't compile Pattern to fsm for 
     )
rL   rD   to_fsmrI   rK   r   r   warningreprKeyError)r&   rO   er)   r)   r*   get_fsmT   s   
 
&
zComparator.get_fsmbc                 C   sf   ||f| j vr,| || |}}|d u s|d u r"d| j ||f< n
||| j ||f< | j ||f S )NT)rM   rV   
isdisjoint)r&   rO   rW   fafbr)   r)   r*   rX   _   s   zComparator.isdisjointNFkeysskip_markedc                 c   sP    |d u r| j }t|dD ]\}}|r| ||rq| ||s%||fV  qd S )N   )rD   r   	is_markedrX   )r&   r[   r\   rO   rW   r)   r)   r*   checkh   s   
zComparator.checkmax_timec              	   C   sN  | j | | j | }}t|jd |jd }t|jd |jd }|td|tdd }||||f||||f}	}
|	|
}|d u rOd }nt|d dt	|  }z	t
||}W n tyv   td| d| dw dd	d
 |D }|dkrt|d | |||  || d  S t|d | ||d  dS )Nr   rC   g
ףp=
?g_eG|>zNo overlap between z and z existsr   c                 s   s     | ]}|t kr
|nd V  qdS )?N)r   )r<   cr)   r)   r*   r>      s    z1Comparator.get_example_overlap.<locals>.<genexpr>)rD   rJ   r?   r:   r   rG   rQ   intersectionintr$   nextstringsStopIterationr%   joinr   )r&   rO   rW   r`   paZpbZ
needed_preZneeded_postalphabetrY   rZ   rc   Zmax_iterationsr'   r)   r)   r*   get_example_overlapq   s$   ""
(zComparator.get_example_overlapc                 C   s   t ||h| jv S r,   )	frozensetrF   r&   rO   rW   r)   r)   r*   r^      s   zComparator.is_markedc                 C   s   | j S r,   )rF   r-   r)   r)   r*   marked_pairs   s   zComparator.marked_pairsc                 C   s
   t | jS r,   )r$   rF   r-   r)   r)   r*   count_marked_pairs   s   
zComparator.count_marked_pairsc                 C   s   | j t||h d S r,   )rF   addrl   rm   r)   r)   r*   mark   s   zComparator.markregexesc                 C   sp   i }|  D ]-\}}zt|||< W q ttfy3 } ztd| dt|  W Y d }~qd }~ww | |S )Nz#Can't compile regex to Pattern for rP   )itemsr   r   r   r   rR   rS   )clsrr   r8   krrU   r)   r)   r*   from_regexes   s   &zComparator.from_regexes)NFr,   )r/   r0   r1   r2   r   r   r   rN   r   rV   r5   rX   r	   r   r_   floatr   rk   r^   r6   rn   ro   rq   classmethodr3   rw   r)   r)   r)   r*   r7   <   s    (		
r7   N)collectionsr   dataclassesr   	itertoolsr   typingr   r   r   r   r	   r
   r   r   Zinteregularr   r   Zinteregular.fsmr   r   r   Zinteregular.patternsr   r   r   Zinteregular.utilsr   r   r   r7   r)   r)   r)   r*   <module>   s    (/