o
    lqi_                     @  s  U d Z ddlmZ ddlZddlZddlZddlZg dZedZ	edZ
dRd	d
Ze ZdSddZedZe ZdTddZdUddZe ZedZedZedZi eddeddedd ed!d"ed#d$ed%d&ed'd(ed)d*ed+d,ed-d.ed/d0ed1d2ed3d4ed5d6ed7d8ed9d:ed;d<ed=d>ed?d@edAdBedCdDedEdFiZdVdHdIZe ZdJdKdLdMdNdOZdedP< edQjdWi eejZdS )Xzi
This gives other modules access to the gritty details about characters and the
encodings that use them.
    )annotationsN)
zlatin-1zsloppy-windows-1252zsloppy-windows-1251zsloppy-windows-1250zsloppy-windows-1253zsloppy-windows-1254zsloppy-windows-1257z
iso-8859-2macromancp437u   [ʼ‘-‛]u	   [“-‟]returndict[str, re.Pattern[str]]c                  C  sX   dt di} tD ] }tttdddg }||}d| d}t || |< q	| S )a  
    ENCODING_REGEXES contain reasonably fast ways to detect if we
    could represent a given string in a given encoding. The simplest one is
    the 'ascii' detector, which of course just determines if all characters
    are between U+0000 and U+007F.
    asciiz^[ -]*$         z^[ --z]*$)recompileCHARMAP_ENCODINGSbyteslistrangedecode)Zencoding_regexesencodingZ
byte_rangeZcharlistregex r   T/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/ftfy/chardata.py_build_regexes    s   
r   dict[str, str]c                  C  sj   i } t jj D ]*\}}|dr2|| d| < || kr2| }d| }t ||kr2| | |< q| S )N;&)htmlentitieshtml5itemsendswithlowerupperunescape)r   namecharZ
name_upperZentity_upperr   r   r   _build_html_entities>   s   
r$   z&#?[0-9A-Za-z]{1,24};textstrr   boolc                 C  s   t t| | S )z
    Given text and a single-byte encoding, check whether that text could have
    been decoded from that single-byte encoding.

    In other words, check whether it can be encoded in that encoding, possibly
    sloppily.
    )r'   ENCODING_REGEXESmatch)r%   r   r   r   r   possible_encodingV   s   r*   dict[int, None]c                  C  sJ   i } t tdddgtdddgtddd	gtd
dD ]}d| |< q| S )z
    Build a translate mapping that strips likely-unintended control characters.
    See :func:`ftfy.fixes.remove_control_chars` for a description of these
    codepoint ranges and why they should be removed.
    r   	                ij   ip   i  i  i  N)	itertoolschainr   )Zcontrol_charsir   r   r   _build_control_char_mappinga   s   
	r4   se   [][ ]|[][ ][---]|[-][---][ ]|[][ ][-][-]|[][-][ ][-]|[][-][-][ ]s   [-][]|[-][?]|[-][?][-][?-]|[-][?-][-][?]|[-][?][-]|[-][-][?]|[-][?][-][-]|[-][-][?][-]|[-][-][-][?]|z[\x80-\x9f]u   ĲZIJu   ĳZiju   ŉu   ʼnu   ǱZDZu   ǲZDzu   ǳZdzu   Ǆu   DŽu   ǅu   Džu   ǆu   džu   ǇZLJu   ǈZLju   ǉZlju   ǊZNJu   ǋZNju   ǌZnju   ﬀffu   ﬁfiu   ﬂflu   ﬃffiu   ﬄfflu   ﬅu   ſtu   ﬆstdict[int, str]c                  C  s@   ddi} t ddD ]}t|}td|}||kr|| |< q	| S )zt
    Build a translate mapping that replaces halfwidth and fullwidth forms
    with their standard-width forms.
    i 0   i  i  NFKC)r   chrunicodedata	normalize)Z	width_mapr3   r#   	alternater   r   r   _build_width_map   s   rB   u   ĂÂÄĀÅÃÆĆČÇĎĐÉĚÊËĖÈĒĘÐĞĢÍÎÏİÌĪĶĹĻŁŃŇŅÑÓÔÖŐÒŌØÕŘŚŠŞŢÞÚÛÜŰÙŪŲŮÝŹŽŻß×ΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯu   áăâäàāąåãæćčçďéěêëėèēęęģíîïìīįķĺļŕźΰαβγδεζηθικλμνξοабвгдежзийклмнопu   đðğóšπσруu   -¿ ĄÆĽŁØŖŚŠŞŤŸŹŽŻŒąæƒľłøŗśšşťźžżœˆˇ˘˛˜˝΄΅ΆΈΉΊΌΎΏЁЂЃЄЅІЇЈЉЊЋЌЎЏёђѓєѕіїјљњћќўџҐґ–—―‘’‚“”„†‡•…‰‹›€№™u   -¿ĄÆĽŁØŖŚŠŞŤŸŹŽŻŒąæƒľłøŗśšşťźžżœˆˇ˘˛˜˝΄΅ΆΈΉΊΌΎΏЁЂЃЄЅІЇЈЉЊЋЌЎЏёђѓєѕіїјљњћќўџҐґ†‡‰‹›€№™)Zutf8_first_of_2Zutf8_first_of_3Zutf8_first_of_4Zutf8_continuationZutf8_continuation_strict
UTF8_CLUESz
    (?<! [{utf8_continuation_strict}])
    (
        [{utf8_first_of_2}] [{utf8_continuation}]
        |
        [{utf8_first_of_3}] [{utf8_continuation}]{{2}}
        |
        [{utf8_first_of_4}] [{utf8_continuation}]{{3}}
    )+
    )r   r   )r   r   )r%   r&   r   r&   r   r'   )r   r+   )r   r;   r   )__doc__
__future__r   r   r1   r   r?   r   r   ZSINGLE_QUOTE_REZDOUBLE_QUOTE_REr   r(   r$   ZHTML_ENTITY_REZHTML_ENTITIESr*   r4   ZCONTROL_CHARSZALTERED_UTF8_REZLOSSY_UTF8_REZC1_CONTROL_REordZ	LIGATURESrB   Z	WIDTH_MAPrC   __annotations__formatVERBOSEZUTF8_DETECTOR_REr   r   r   r   <module>   s    






(
	

Ge     &		
