a
    ´iÌR  ã                   @   sÚ   d dl Z d dlZd dlmZ dgZe  d¡Ze  d¡Ze  d¡Ze  d¡Z	e  d¡Z
e  d	¡Ze  d
¡Ze  d¡Ze  d¡Ze  d¡Ze  de j¡Ze  de j¡Ze  de j¡Ze  d
¡Ze  d¡ZG dd„ dejƒZdS )é    N)ÚunescapeÚ
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]z
</[a-zA-Z]ú>z--!?>z-?>z0([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*a{  
  (
    (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
   )
  ([\t\n\r\f ]*=[\t\n\r\f ]*        # value indicator
    ('[^']*'                        # LITA-enclosed value
    |"[^"]*"                        # LIT-enclosed value
    |(?!['"])[^>\t\n\r\f ]*         # bare value
    )
   )?
  (?:[\t\n\r\f ]|/(?!>))*           # possibly followed by a space
a  
  [a-zA-Z][^\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:[\t\n\r\f ]*=[\t\n\r\f ]*    # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
        \s*                          # possibly followed by a space
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                   @   sü   e Zd ZdZdZdddœdd„Zdd	„ Zd
d„ Zdd„ ZdZ	dd„ Z
ddœdd„Zdd„ Zd=dd„Zdd„ Zdd„ Zd>dd„Zd?dd „Zd!d"„ Zd#d$„ Zd%d&„ Zd'd(„ Zd)d*„ Zd+d,„ Zd-d.„ Zd/d0„ Zd1d2„ Zd3d4„ Zd5d6„ Zd7d8„ Zd9d:„ Zd;d<„ ZdS )@r   )ZscriptÚstyleZxmpZiframeZnoembedZnoframes)ZtextareaÚtitleTF)Úconvert_charrefsÚ	scriptingc                C   s   || _ || _|  ¡  d S ©N)r   r   Úreset)Úselfr   r   © r   ú#/usr/lib64/python3.9/html/parser.pyÚ__init__v   s    
zHTMLParser.__init__c                 C   s4   d| _ d| _t| _d | _d| _d| _tj 	| ¡ d S )NÚ z???T)
ÚrawdataÚlasttagÚinteresting_normalÚinterestingÚ
cdata_elemÚ_support_cdataÚ
_escapableÚ_markupbaseÚ
ParserBaser
   ©r   r   r   r   r
   „   s    zHTMLParser.resetc                 C   s   | j | | _ |  d¡ d S )Nr   )r   Úgoahead©r   Údatar   r   r   ÚfeedŽ   s    zHTMLParser.feedc                 C   s   |   d¡ d S )Né   )r   r   r   r   r   Úclose—   s    zHTMLParser.closeNc                 C   s   | j S r	   )Ú_HTMLParser__starttag_textr   r   r   r   Úget_starttag_text   s    zHTMLParser.get_starttag_text©Ú	escapablec                C   sp   |  ¡ | _|| _| jdkr(t d¡| _nD|rP| jsPt d| j tjtjB ¡| _nt d| j tjtjB ¡| _d S )NÚ	plaintextz\Zz&|</%s(?=[\t\n\r\f />])z</%s(?=[\t\n\r\f />]))	Úlowerr   r   ÚreÚcompiler   r   Ú
IGNORECASEÚASCII)r   Úelemr#   r   r   r   Úset_cdata_mode¡   s    



ÿ
ÿzHTMLParser.set_cdata_modec                 C   s   t | _d | _d| _d S )NT)r   r   r   r   r   r   r   r   Úclear_cdata_mode­   s    zHTMLParser.clear_cdata_modec                 C   s
   || _ d S r	   )r   )r   Úflagr   r   r   Ú_set_support_cdata²   s    zHTMLParser._set_support_cdatac                 C   s4  | j }d}t|ƒ}||k rÌ| jrv| jsv| d|¡}|dk r | dt||d ƒ¡}|dkrpt d¡ 	||¡spqÌ|}n*| j
 	||¡}|r’| ¡ }n| jrœqÌ|}||k rÞ| jrÌ| jrÌ|  t|||… ƒ¡ n|  |||… ¡ |  ||¡}||kröqÌ|j}|d|ƒr<t ||¡r"|  |¡}	nŒ|d|ƒr:|  |¡}	nt|d|ƒrR|  |¡}	n\|d|ƒrj|  |¡}	nD|d	|ƒr‚|  |¡}	n,|d
 |k s–|rÌ|  d¡ |d
 }	nqÌ|	dk r.|sÂqÌt ||¡rÔnV|d|ƒr$|d |krú|  d¡ n&t ||¡r
n|  ||d d … ¡ n|d|ƒr||}dD ]*}
| |
|d ¡r8|t|
ƒ8 } qdq8|  ||d |… ¡ n®|d|ƒr¨| jr¨|  ||d d … ¡ n‚|||d …  ¡ dkrÚ|  ||d d … ¡ nP|d	|ƒrþ|  ||d d … ¡ n,|d|ƒr"|  ||d d … ¡ nt dƒ‚|}	|  ||	¡}q|d|ƒrât! ||¡}|r¤| "¡ dd… }|  #|¡ | $¡ }	|d|	d
 ƒs”|	d
 }	|  ||	¡}qn<d||d … v rÌ|  |||d … ¡ |  ||d ¡}qÌq|d|ƒrt% ||¡}|r@| "d
¡}|  &|¡ | $¡ }	|d|	d
 ƒs2|	d
 }	|  ||	¡}qt' ||¡}|rš|rÌ| "¡ ||d … krÌ| $¡ }	|	|kr„|}	|  ||d
 ¡}qÌn.|d
 |k rÌ|  d¡ |  ||d
 ¡}nqÌqq|r"||k r"| jr| jr|  t|||… ƒ¡ n|  |||… ¡ |  ||¡}||d … | _ d S )Nr   ú<ú&é"   z[\t\n\r\f ;]z</ú<!--z<?z<!r   é   )z--!z--ú-é   ú	<![CDATA[é   é	   ú	<!doctypezwe should not get here!z&#éÿÿÿÿú;)(r   Úlenr   r   ÚfindÚrfindÚmaxr&   r'   Úsearchr   Ústartr   Úhandle_datar   Z	updateposÚ
startswithÚstarttagopenÚmatchÚparse_starttagÚparse_endtagÚparse_commentÚparse_piÚparse_html_declarationÚ
endtagopenÚhandle_commentÚendswithr   Úunknown_declr%   Úhandle_declÚ	handle_piÚAssertionErrorÚcharrefÚgroupÚhandle_charrefÚendÚ	entityrefÚhandle_entityrefÚ
incomplete)r   rU   r   ÚiÚnÚjZampposrE   rC   ÚkÚsuffixÚnamer   r   r   r   Â   sâ    
ÿ










zHTMLParser.goaheadc                 C   sX  | j }|||d … dkr$|  |¡S |||d … dkrx| jrx| d|d ¡}|dk rZdS |  ||d |… ¡ |d S |||d …  ¡ d	krÊ| d
|d ¡}|dkr¬dS |  ||d |… ¡ |d S |||d … dkrJ| d
|d ¡}|dk  rþdS ||d  dkr,|  ||d |d … ¡ n|  ||d |… ¡ |d S |  |¡S d S )Nr5   r2   r8   r6   z]]>r   r:   r7   r9   r   r3   r   z<![ú])	r   rH   r   r=   rN   r%   rO   rL   Úparse_bogus_comment)r   rY   r   r[   Zgtposr   r   r   rJ   N  s0    

z!HTMLParser.parse_html_declarationc                 C   s\   | j }t ||d ¡}|s2t ||d ¡}|s2dS |rT| ¡ }|  ||d |… ¡ | ¡ S )Nr5   r:   )r   Úcommentcloser@   ÚcommentabruptcloserE   rA   rL   rU   )r   rY   Úreportr   rE   r[   r   r   r   rH   p  s    zHTMLParser.parse_commentr   c                 C   sD   | j }| d|d ¡}|dkr"dS |r<|  ||d |… ¡ |d S )Nr   r3   r:   r   )r   r=   rL   )r   rY   rc   r   Úposr   r   r   r`     s    zHTMLParser.parse_bogus_commentc                 C   sH   | j }t ||d ¡}|sdS | ¡ }|  ||d |… ¡ | ¡ }|S )Nr3   r:   )r   Úpicloser@   rA   rP   rU   ©r   rY   r   rE   r[   r   r   r   rI   ‹  s    zHTMLParser.parse_pic                 C   s  d | _ |  |¡}|dk r|S | j}|||… | _ g }t ||d ¡}| ¡ }| d¡ ¡  | _}||k r t	 ||¡}|s~q | ddd¡\}	}
}|
sœd }nZ|d d… d  krÀ|dd … ksên |d d… d  kræ|dd … krön n|dd… }|rt
|ƒ}| |	 ¡ |f¡ | ¡ }q`|||…  ¡ }|dvrž|  ¡ \}}d	| j v rz|| j  d	¡ }t| j ƒ| j  d	¡ }n|t| j ƒ }|  |||… ¡ |S | d
¡r¸|  ||¡ n^|  ||¡ || jv sì| jrâ|dksì|dkrü| j|dd n|| jv r| j|dd |S )Nr   r   r3   r7   ú'r:   ú")r   ú/>Ú
ri   Znoscriptr$   Fr"   T)r    Úcheck_for_whole_start_tagr   Útagfind_tolerantrE   rU   rS   r%   r   Úattrfind_tolerantr   ÚappendÚstripZgetposÚcountr<   r>   rB   rM   Úhandle_startendtagÚhandle_starttagÚCDATA_CONTENT_ELEMENTSr   r+   ÚRCDATA_CONTENT_ELEMENTS)r   rY   Úendposr   ÚattrsrE   r\   ÚtagÚmÚattrnameÚrestZ	attrvaluerU   ÚlinenoÚoffsetr   r   r   rF   —  sh    

&ÿ
ÿ



ÿÿÿþzHTMLParser.parse_starttagc                 C   s6   | j }t ||d ¡}| ¡ }||d  dkr2dS |S )Nr   r   r:   )r   ÚlocatetagendrE   rU   rf   r   r   r   rk   Ð  s    z$HTMLParser.check_for_whole_start_tagc                 C   s´   | j }| d|d ¡dk rdS t ||¡sT||d |d … dkrJ|d S |  |¡S t ||d ¡}| ¡ }||d  dkr€dS t ||d ¡}| d¡ 	¡ }|  
|¡ |  ¡  |S )Nr   r3   r   r:   r7   r   )r   r=   rK   rE   r`   r}   rU   rl   rS   r%   Úhandle_endtagr,   )r   rY   r   rE   r[   rw   r   r   r   rG   Ú  s     

zHTMLParser.parse_endtagc                 C   s   |   ||¡ |  |¡ d S r	   )rr   r~   ©r   rw   rv   r   r   r   rq   ø  s    zHTMLParser.handle_startendtagc                 C   s   d S r	   r   r   r   r   r   rr   ý  s    zHTMLParser.handle_starttagc                 C   s   d S r	   r   )r   rw   r   r   r   r~     s    zHTMLParser.handle_endtagc                 C   s   d S r	   r   ©r   r^   r   r   r   rT     s    zHTMLParser.handle_charrefc                 C   s   d S r	   r   r€   r   r   r   rW   	  s    zHTMLParser.handle_entityrefc                 C   s   d S r	   r   r   r   r   r   rB     s    zHTMLParser.handle_datac                 C   s   d S r	   r   r   r   r   r   rL     s    zHTMLParser.handle_commentc                 C   s   d S r	   r   )r   Zdeclr   r   r   rO     s    zHTMLParser.handle_declc                 C   s   d S r	   r   r   r   r   r   rP     s    zHTMLParser.handle_pic                 C   s   d S r	   r   r   r   r   r   rN     s    zHTMLParser.unknown_decl)T)T)r   ) Ú__name__Ú
__module__Ú__qualname__rs   rt   r   r
   r   r   r    r!   r+   r,   r.   r   rJ   rH   r`   rI   rF   rk   rG   rq   rr   r~   rT   rW   rB   rL   rO   rP   rN   r   r   r   r   r   Z   s<   
	
 "

9
)r&   r   Zhtmlr   Ú__all__r'   r   rX   rV   rR   rD   rK   re   ra   rb   rl   ÚVERBOSErm   r}   Zlocatestarttagend_tolerantZ	endendtagZ
endtagfindr   r   r   r   r   r   Ú<module>   s2   










õóò

