B
    -¦a÷†  ã            *   @   s`  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZdd„ Zdddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-œ)Z	d.d/„ Z
G d0d1„ d1eƒZd2d3„ ZG d4d5„ d5e d6d7d8g¡ƒZG d9d:„ d:eƒZG d;d<„ d<ƒZG d=d>„ d>ƒZG d?d@„ d@eƒZG dAdB„ dBejƒZG dCdD„ dDƒZG dEdF„ dFƒZdGdH„ ZG dIdJ„ dJƒZdS )Ké    Nc             C   s   t j|  d¡ S )NÚ	utf_16_be)ÚcodecsÚBOM_UTF16_BEÚencode)Ús© r   ú1/tmp/pip-unpacked-wheel-rysjrues/PIL/PdfParser.pyÚencode_text   s    r	   úu   Ë˜u   Ë‡u   Ë†u   Ë™u   Ëu   Ë›u   Ëšu   Ëœu   â€¢u   â€ u   â€¡u   â€¦u   â€”u   â€“u   Æ’u   â„u   â€¹u   â€ºu   âˆ’u   â€°u   â€žu   â€œu   â€u   â€˜u   â€™u   â€šu   â„¢u   ï¬u   ï¬‚u   Åu   Å’u   Å u   Å¸u   Å½u   Ä±u   Å‚u   Å“u   Å¡u   Å¾u   â‚¬))é   é   é   é   é   é   é   é   é   é€   é   é‚   éƒ   é„   é…   é†   é‡   éˆ   é‰   éŠ   é‹   éŒ   é   éŽ   é   é   é‘   é’   é“   é”   é•   é–   é—   é˜   é™   éš   é›   éœ   é   éž   é    c             C   sH   | d t tjƒ… tjkr0| t tjƒd …  d¡S d dd„ | D ƒ¡S d S )Nr   Ú c             s   s   | ]}t  |t|ƒ¡V  qd S )N)ÚPDFDocEncodingÚgetÚchr)Ú.0Úbyter   r   r   ú	<genexpr>B   s    zdecode_text.<locals>.<genexpr>)Úlenr   r   ÚdecodeÚjoin)Úbr   r   r   Údecode_text>   s    r?   c               @   s   e Zd ZdZdS )ÚPdfFormatErrorz\An error that probably indicates a syntactic or semantic error in the
    PDF file structureN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r@   E   s   r@   c             C   s   | st |ƒ‚d S )N)r@   )Ú	conditionÚerror_messager   r   r   Úcheck_format_conditionL   s    rG   c               @   s4   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ ZdS )ÚIndirectReferencec             C   s   d|  S )Nz%s %s Rr   )Úselfr   r   r   Ú__str__T   s    zIndirectReference.__str__c             C   s   |   ¡  d¡S )Nzus-ascii)rJ   r   )rI   r   r   r   Ú	__bytes__W   s    zIndirectReference.__bytes__c             C   s$   |j | j ko"|j| jko"|j| jkS )N)Ú	__class__Ú	object_idÚ
generation)rI   Úotherr   r   r   Ú__eq__Z   s    zIndirectReference.__eq__c             C   s
   | |k S )Nr   )rI   rO   r   r   r   Ú__ne__a   s    zIndirectReference.__ne__c             C   s   t | j| jfƒS )N)ÚhashrM   rN   )rI   r   r   r   Ú__hash__d   s    zIndirectReference.__hash__N)rA   rB   rC   rJ   rK   rP   rQ   rS   r   r   r   r   rH   Q   s
   rH   ZIndirectReferenceTuplerM   rN   c               @   s   e Zd Zdd„ ZdS )ÚIndirectObjectDefc             C   s   d|  S )Nz	%s %s objr   )rI   r   r   r   rJ   i   s    zIndirectObjectDef.__str__N)rA   rB   rC   rJ   r   r   r   r   rT   h   s   rT   c               @   sL   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Z	dd„ Z
dS )Ú	XrefTablec             C   s    i | _ i | _ddi| _d| _d S )Nr   i   F)Úexisting_entriesÚnew_entriesÚdeleted_entriesÚreading_finished)rI   r   r   r   Ú__init__n   s    
zXrefTable.__init__c             C   s2   | j r|| j|< n
|| j|< || jkr.| j|= d S )N)rY   rW   rV   rX   )rI   ÚkeyÚvaluer   r   r   Ú__setitem__t   s
    

zXrefTable.__setitem__c             C   s*   y
| j | S  tk
r$   | j| S X d S )N)rW   ÚKeyErrorrV   )rI   r[   r   r   r   Ú__getitem__|   s    
zXrefTable.__getitem__c             C   s†   || j kr0| j | d d }| j |= || j|< nR|| jkrX| j| d d }|| j|< n*|| jkrn| j| }ntdt|ƒ d ƒ‚d S )Né   z
object ID z+ cannot be deleted because it doesn't exist)rW   rX   rV   Ú
IndexErrorÚstr)rI   r[   rN   r   r   r   Ú__delitem__‚   s    


zXrefTable.__delitem__c             C   s   || j kp|| jkS )N)rV   rW   )rI   r[   r   r   r   Ú__contains__‘   s    zXrefTable.__contains__c             C   s.   t t| j ¡ ƒt| j ¡ ƒB t| j ¡ ƒB ƒS )N)r;   ÚsetrV   ÚkeysrW   rX   )rI   r   r   r   Ú__len__”   s    zXrefTable.__len__c             C   s*   t | j ¡ ƒt | j ¡ ƒ t | j ¡ ƒB S )N)re   rV   rf   rX   rW   )rI   r   r   r   rf   ›   s    zXrefTable.keysc          	   C   sX  t t| j ¡ ƒt| j ¡ ƒB ƒ}t t| j ¡ ƒƒ}| ¡ }| d¡ x|rRd }xPt|ƒD ]<\}}|d ksx|d |kr~|}q\|d |… }||d … }P q\W |}d }| d|d t|ƒf ¡ x|D ]ˆ}	|	| jkrè| d| j|	  ¡ qÄ| 	d¡}
t
|	|
kd|	› d|
› ƒ y|d }W n tk
r2   d}Y nX | d|| j|	 f ¡ qÄW qHW |S )	Ns   xref
r`   s   %d %d
r   s   %010d %05d n 
z*expected the next deleted object ID to be z, instead found s   %010d %05d f 
)Úsortedre   rW   rf   rX   ÚtellÚwriteÚ	enumerater;   ÚpoprG   ra   )rI   Úfrf   Zdeleted_keysZ	startxrefÚprevÚindexr[   Zcontiguous_keysrM   Zthis_deleted_object_idZnext_in_linked_listr   r   r   rj       s<     





zXrefTable.writeN)rA   rB   rC   rZ   r]   r_   rc   rd   rg   rf   rj   r   r   r   r   rU   m   s   rU   c               @   sd   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ Zedd„ ƒZ	e
eddƒƒdd„ dD ƒ Zdd„ ZdS )ÚPdfNamec             C   s6   t |tƒr|j| _nt |tƒr&|| _n| d¡| _d S )Nzus-ascii)Ú
isinstancerp   ÚnameÚbytesr   )rI   rr   r   r   r   rZ   É   s
    


zPdfName.__init__c             C   s   | j  d¡S )Nzus-ascii)rr   r<   )rI   r   r   r   Úname_as_strÑ   s    zPdfName.name_as_strc             C   s    t |tƒr|j| jkp|| jkS )N)rq   rp   rr   )rI   rO   r   r   r   rP   Ô   s    zPdfName.__eq__c             C   s
   t | jƒS )N)rR   rr   )rI   r   r   r   rS   Ù   s    zPdfName.__hash__c             C   s   dt | jƒ› dS )NzPdfName(ú))Úreprrr   )rI   r   r   r   Ú__repr__Ü   s    zPdfName.__repr__c             C   s   | t  |¡ƒS )N)Ú	PdfParserÚinterpret_name)ÚclsÚdatar   r   r   Úfrom_pdf_streamß   s    zPdfName.from_pdf_streamé!   é   c             C   s   h | ]}t |ƒ’qS r   )Úord)r8   Úcr   r   r   ú	<setcomp>ã   s    zPdfName.<setcomp>z#%/()<>[]{}c             C   sD   t dƒ}x2| jD ](}|| jkr*| |¡ q| d| ¡ qW t|ƒS )Nó   /s   #%02X)Ú	bytearrayrr   Úallowed_charsÚappendÚextendrs   )rI   Úresultr>   r   r   r   rK   å   s    
zPdfName.__bytes__N)rA   rB   rC   rZ   rt   rP   rS   rw   Úclassmethodr|   re   Úranger„   rK   r   r   r   r   rp   È   s   rp   c               @   s   e Zd Zdd„ ZdS )ÚPdfArrayc             C   s   dd  dd„ | D ƒ¡ d S )Ns   [ ó    c             s   s   | ]}t |ƒV  qd S )N)Úpdf_repr)r8   Úxr   r   r   r:   ñ   s    z%PdfArray.__bytes__.<locals>.<genexpr>s    ])r=   )rI   r   r   r   rK   ð   s    zPdfArray.__bytes__N)rA   rB   rC   rK   r   r   r   r   rŠ   ï   s   rŠ   c               @   s$   e Zd Zdd„ Zdd„ Zdd„ ZdS )ÚPdfDictc             C   s,   |dkrt j | ||¡ n|| | d¡< d S )Nr{   zus-ascii)ÚcollectionsÚUserDictÚ__setattr__r   )rI   r[   r\   r   r   r   r‘   õ   s    zPdfDict.__setattr__c          
   C   s,  y| |  d¡ }W n, tk
r> } zt|ƒ|‚W d d }~X Y nX t|tƒrRt|ƒ}| d¡r(| d¡rt|dd … }d}t|ƒdkrÀ|d }t	|dd… ƒd	 }t|ƒd
krÀ|t	|dd
… ƒ7 }dd t|ƒd … }t
 |d t|ƒd … |¡}|dkr(|d	9 }|dkr|d9 }t
 t |¡| ¡}|S )Nzus-asciiÚDatezD:é   ÚZé   é   é   é<   é   é   z%Y%m%d%H%M%S)ú+ú-r›   éÿÿÿÿ)r   r^   ÚAttributeErrorrq   rs   r?   ÚendswithÚ
startswithr;   ÚintÚtimeÚstrptimeÚgmtimeÚcalendarÚtimegm)rI   r[   r\   ÚeZrelationshipÚoffsetÚformatr   r   r   Ú__getattr__û   s.    



zPdfDict.__getattr__c             C   sr   t dƒ}xV|  ¡ D ]J\}}|d kr$qt|ƒ}| d¡ | tt|ƒƒ¡ | d¡ | |¡ qW | d¡ t|ƒS )Ns   <<ó   
r‹   s   
>>)rƒ   ÚitemsrŒ   r†   rs   rp   )rI   Úoutr[   r\   r   r   r   rK     s    


zPdfDict.__bytes__N)rA   rB   rC   r‘   rª   rK   r   r   r   r   rŽ   ô   s   rŽ   c               @   s   e Zd Zdd„ Zdd„ ZdS )Ú	PdfBinaryc             C   s
   || _ d S )N)r{   )rI   r{   r   r   r   rZ   %  s    zPdfBinary.__init__c             C   s   dd  dd„ | jD ƒ¡ S )Ns   <%s>ó    c             s   s   | ]}d | V  qdS )s   %02XNr   )r8   r>   r   r   r   r:   )  s    z&PdfBinary.__bytes__.<locals>.<genexpr>)r=   r{   )rI   r   r   r   rK   (  s    zPdfBinary.__bytes__N)rA   rB   rC   rZ   rK   r   r   r   r   r®   $  s   r®   c               @   s   e Zd Zdd„ Zdd„ ZdS )Ú	PdfStreamc             C   s   || _ || _d S )N)Ú
dictionaryÚbuf)rI   r±   r²   r   r   r   rZ   -  s    zPdfStream.__init__c             C   s†   y| j j}W n tk
r"   | jS X |dkrjy| j j}W n tk
rT   | j j}Y nX tj| jt|ƒdS t	dt
| j jƒ› dƒ‚d S )Ns   FlateDecode)Úbufsizezstream filter z unknown/unsupported)r±   ÚFilterrž   r²   ZDLÚLengthÚzlibÚ
decompressr¡   ÚNotImplementedErrorrv   )rI   ÚfilterZexpected_lengthr   r   r   r<   1  s    zPdfStream.decodeN)rA   rB   rC   rZ   r<   r   r   r   r   r°   ,  s   r°   c             C   s  | dkrdS | dkrdS | d kr$dS t | ttttfƒr>t| ƒS t | tƒrVt| ƒ d¡S t | t	ƒrnt| ƒ d¡S t | t
jƒr”dt
 d| ¡ d¡ d	 S t | tƒrªtt| ƒƒS t | tƒrÀtt| ƒƒS t | tƒrÖtt| ƒƒS t | tƒr|  d
d¡} |  dd¡} |  d	d¡} d|  d	 S t| ƒS d S )NTs   trueFs   falses   nullzus-asciis   (D:z%Y%m%d%H%M%SZó   )ó   \s   \\ó   (s   \(s   \))rq   rp   rŽ   rŠ   r®   rs   r¡   rb   r   Úfloatr¢   Ústruct_timeÚstrftimeÚdictÚlistrŒ   r	   Úreplace)r   r   r   r   rŒ   B  s4    




rŒ   c                @   sî  e Zd ZdZdrdd„Zdd„ Zd	d
„ Zdd„ Zdd„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ Zdsdd„Zdd„ Zdd „ Zd!d"„ Zed#d$„ ƒZd%d&„ Zdtd'd(„Zd)Zd*Zd+Zd,Zed- Zed. Zd/Zd0Zee e Ze  !ed1 e d2 e d3 e d4 e d5 e d6 e j"¡Z#e  !ed1 e d7 e d3 e d4 e d5 e e j"¡Z$d8d9„ Z%d:d;„ Z&e  !e¡Z'e  !ed< e d= ¡Z(e  !ed> ¡Z)e  !ed? e ¡Z*e+d@dA„ ƒZ,e  !dB¡Z-e+dudDdE„ƒZ.e  !edF e d= ¡Z/e  !edG e d= ¡Z0e  !edH e d= ¡Z1e  !edI e d= ¡Z2e  !edJ e d= ¡Z3e  !edK ¡Z4e  !edL ¡Z5e  !edM e dN ¡Z6e  !edO ¡Z7e  !edP e dP e dQ e d= ¡Z8e  !edP e dP e dR e d= ¡Z9e  !edS e d= ¡Z:e  !dTe dU e dV ¡Z;e  !edW ¡Z<e  !edX e d= ¡Z=e+dvdZd[„ƒZ>e  !d\¡Z?d]d^d_d`dadbdcdddedfdTdTd=d=dgdge@d]ƒd^e@d_ƒd`e@daƒdbe@dcƒdde@deƒdfe@dTƒdTe@d=ƒd=e@dgƒdgiZAe+dhdi„ ƒZBe  !edj e ¡ZCe  !ed4 e d4 e e ¡ZDe  !dk¡ZEdldm„ ZFdwdndo„ZGdxdpdq„ZHdS )yrx   z|Based on
    https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
    Supports PDF up to 1.4
    Nr   Úrbc             C   s  |r|rt dƒ‚|| _|| _|| _|| _d| _d| _|d k	rZ|d krZt||ƒ | _}d| _|d k	rŽ|  |¡ | _}d| _|sŽt	|dƒrŽ|j
| _i | _|r¢|  ¡  nTd | _| _tƒ | _d | _tƒ | _d | _i | _g | _g | _d | _d | _i | _tƒ | _d| j_|r|  ¡  d S )Nz4specify buf or f or filename, but not both buf and fFTrr   r   )ÚRuntimeErrorÚfilenamer²   rm   Ústart_offsetÚshould_close_bufÚshould_close_fileÚopenÚget_buf_from_fileÚhasattrrr   Úcached_objectsÚread_pdf_infoÚfile_size_totalÚfile_size_thisrŽ   ÚrootÚroot_refÚinfoÚinfo_refÚpage_tree_rootÚpagesÚ
orig_pagesÚ	pages_refÚlast_xref_section_offsetÚtrailer_dictrU   Ú
xref_tablerY   Úseek_end)rI   rÅ   rm   r²   rÆ   Úmoder   r   r   rZ   g  sD    
zPdfParser.__init__c             C   s   | S )Nr   )rI   r   r   r   Ú	__enter__Œ  s    zPdfParser.__enter__c             C   s   |   ¡  dS )NF)Úclose)rI   Úexc_typeÚ	exc_valueÚ	tracebackr   r   r   Ú__exit__  s    zPdfParser.__exit__c             C   s   |   ¡  |  ¡  d S )N)Ú	close_bufrÛ   )rI   r   r   r   Ústart_writing“  s    zPdfParser.start_writingc             C   s.   y| j  ¡  W n tk
r"   Y nX d | _ d S )N)r²   rÞ   rž   )rI   r   r   r   rã   —  s
    zPdfParser.close_bufc             C   s2   | j r|  ¡  | jd k	r.| jr.| j ¡  d | _d S )N)rÇ   rã   rm   rÈ   rÞ   )rI   r   r   r   rÞ   ž  s
    
zPdfParser.closec             C   s   | j  dtj¡ d S )Nr   )rm   ÚseekÚosÚSEEK_END)rI   r   r   r   rÛ   ¥  s    zPdfParser.seek_endc             C   s   | j  d¡ d S )Ns	   %PDF-1.4
)rm   rj   )rI   r   r   r   Úwrite_header¨  s    zPdfParser.write_headerc             C   s   | j  d|› d d¡¡ d S )Nz% Ú
zutf-8)rm   rj   r   )rI   r   r   r   r   Úwrite_comment«  s    zPdfParser.write_commentc             C   sl   |   ¡  |  | j ¡ ¡| _|  d¡| _|  ¡  | j| jtdƒ| jd | j| jtdƒt	| j
ƒ| j
d | jS )Nr   s   Catalog)ÚTypeZPagess   Pages)rë   ZCountZKids)Údel_rootÚnext_object_idrm   ri   rÑ   r×   Úrewrite_pagesÚ	write_objrp   r;   rÕ   )rI   r   r   r   Úwrite_catalog®  s    
zPdfParser.write_catalogc             C   s  g }x®t | jƒD ] \}}| j| }| j|j= | |tdƒ ¡ || jkrJqi }x | ¡ D ]\}}||| 	¡ < qXW | j
|d< | jd|Ž}x(t | jƒD ]\}	}
|
|kr’|| j|	< q’W qW xB|D ]:}x4|rò| j| }|j| jkrä| j|j= | dd ¡}qÀW qºW g | _d S )Ns   ParentÚParent)N)rk   rÖ   rÌ   rÚ   rM   r…   rp   rÕ   r¬   rt   r×   Ú
write_pager6   )rI   Zpages_tree_nodes_to_deleteÚiZpage_refZ	page_infoZstringified_page_infor[   r\   Znew_page_refÚjZcur_page_refZpages_tree_node_refZpages_tree_noder   r   r   rî   ¼  s,    






zPdfParser.rewrite_pagesc             C   sš   |r|   ¡  || _| jr(|  d | j¡| _| j | j¡}t| jƒ}| j|dœ}| j	d k	r`| j	|d< | jrp| j|d< || _	| j dt
t|ƒƒ d|  ¡ d S )N)s   Roots   Sizes   Prevs   Infos   trailer
s   
startxref
%d
%%%%EOF)rì   rÑ   rÒ   rï   rÓ   rÚ   rj   rm   r;   rØ   rs   rŽ   )rI   Znew_root_refZ
start_xrefZnum_entriesrÙ   r   r   r   Úwrite_xref_and_trailerÙ  s    



z PdfParser.write_xref_and_trailerc             O   sL   t |tƒr| j| }d|kr(tdƒ|d< d|kr:| j|d< | j|f|ž|ŽS )Nrë   s   Pagerñ   )rq   r¡   rÕ   rp   r×   rï   )rI   ÚrefÚobjsÚdict_objr   r   r   rò   í  s    


zPdfParser.write_pagec             O   sÈ   | j }|d kr|  | ¡ ¡}n| ¡ |jf| j|j< | tt|Ž ƒ¡ | 	dd ¡}|d k	rft
|ƒ|d< |rx| t|ƒ¡ x|D ]}| t|ƒ¡ q~W |d k	rº| d¡ | |¡ | d¡ | d¡ |S )NÚstreamrµ   s   stream
s   
endstream
s   endobj
)rm   rí   ri   rN   rÚ   rM   rj   rs   rT   rl   r;   rŒ   )rI   rö   r÷   rø   rm   rù   Úobjr   r   r   rï   ö  s$    




zPdfParser.write_objc             C   s.   | j d krd S | j| j j= | j| jd j= d S )Ns   Pages)rÑ   rÚ   rM   rÐ   )rI   r   r   r   rì     s    
zPdfParser.del_rootc             C   sT   t | dƒr|  ¡ S t | dƒr$|  ¡ S ytj|  ¡ dtjdS  tk
rN   dS X d S )NÚ	getbufferÚgetvaluer   )Úaccessr¯   )rË   rû   rü   ÚmmapÚfilenoZACCESS_READÚ
ValueError)rm   r   r   r   rÊ     s    

zPdfParser.get_buf_from_filec             C   sü   t | jƒ| _| j| j | _|  ¡  | jd | _| j dd ¡| _	t
|  | j¡ƒ| _| j	d krdt
ƒ | _nt
|  | j	¡ƒ| _td| jkdƒ t| jd dkdƒ td| jkdƒ tt| jd tƒd	ƒ | jd | _|  | j¡| _|  | j¡| _| jd d … | _d S )
Ns   Roots   Infos   Typez/Type missing in Roots   Catalogz/Type in Root is not /Catalogs   Pagesz/Pages missing in Rootz+/Pages in Root is not an indirect reference)r;   r²   rÎ   rÆ   rÏ   Úread_trailerrÙ   rÑ   r6   rÓ   rŽ   Úread_indirectrÐ   rÒ   rG   rq   rH   r×   rÔ   Úlinearize_page_treerÕ   rÖ   )rI   r   r   r   rÍ     s(    

zPdfParser.read_pdf_infoc             C   sX   yt t| j ¡ ƒd dƒ}W n tk
r:   t ddƒ}Y nX |d k	rT|df| j|j< |S )Nr`   r   )rH   ÚmaxrÚ   rf   r   rM   )rI   r¨   Ú	referencer   r   r   rí   9  s    zPdfParser.next_object_ids   [][()<>{}/%]s$   [][()<>{}/%\000\011\012\014\015\040]s   [\000\011\012\014\015\040]s#   [\000\011\012\014\015\0400-9a-fA-F]ó   *ó   +s   [\000\011\014\015\040]*s   [\r\n]+s   trailers   \<\<(.*\>\>)s	   startxrefs   ([0-9]+)s   %%EOFó   $s   \<\<(.*?\>\>)c             C   sÄ   t | jƒd }|| jk r| j}| j | j|¡}t|dƒ |}x$|r`|}| j | j| ¡ d ¡}q>W |sj|}| d¡}t| d¡ƒ| _	|  
|¡| _tƒ | _| j| j	d d| jkrÀ|  | jd ¡ d S )Ni @  ztrailer end not foundé   r`   r“   )Úxref_section_offsets   Prev)r;   r²   rÆ   Úre_trailer_endÚsearchrG   ÚstartÚgroupr¡   rØ   Úinterpret_trailerrÙ   rU   rÚ   Úread_xref_tableÚread_prev_trailer)rI   Zsearch_start_offsetÚmZ
last_matchÚtrailer_datar   r   r   r  j  s$    



zPdfParser.read_trailerc             C   sv   | j |d}| j | j||d … ¡}t|dƒ | d¡}tt| d¡ƒ|kdƒ |  |¡}d|krr|  |d ¡ d S )N)r
  i @  zprevious trailer not foundr`   r“   zGxref section offset in previous trailer doesn't match what was expecteds   Prev)	r  Úre_trailer_prevr  r²   rG   r  r¡   r  r  )rI   r
  Ztrailer_offsetr  r  rÙ   r   r   r   r    s    


zPdfParser.read_prev_trailers   /([!-$&'*-.0-;=?-Z\\^-z|~]+)(?=rº   s   \<\<s   \>\>c             C   sÂ   i }d}x|| j  ||¡}|sV| j ||¡}t|o>| ¡ t|ƒkdt||d … ƒ ƒ P |  | d¡¡}|  	|| ¡ ¡\}}|||< q
W td|koœt
|d tƒdƒ td|ko¸t
|d tƒdƒ |S )Nr   z+name not found in trailer, remaining data: r`   s   Sizez&/Size not in trailer or not an integers   Rootz1/Root not in trailer or not an indirect reference)Úre_nameÚmatchÚre_dict_endrG   Úendr;   rv   ry   r  Ú	get_valuerq   r¡   rH   )rz   r  Útrailerr¨   r  r[   r\   r   r   r   r  ˜  s*    zPdfParser.interpret_trailers   ([^#]*)(#([0-9a-fA-F]{2}))?Fc             C   sr   d}xR| j  |¡D ]B}| d¡rF|| d¡t | d¡ d¡¡ 7 }q|| d¡7 }qW |rf| d¡S t|ƒS d S )Nr¯   é   r`   zus-asciizutf-8)Úre_hashes_in_nameÚfinditerr  rƒ   Úfromhexr<   rs   )rz   ÚrawZas_textrr   r  r   r   r   ry   µ  s    
&
zPdfParser.interpret_names   null(?=s   true(?=s   false(?=s   ([-+]?[0-9]+)(?=s)   ([-+]?([0-9]+\.[0-9]*|[0-9]*\.[0-9]+))(?=s   \[ó   ]s   \<(s   *)\>s   \(s   ([-+]?[0-9]+)s   R(?=s   obj(?=s	   endobj(?=r¼   s	   %[^\r\n]*s   )*s   stream\r?\ns   endstream(?=r   c          
   C   sè  |dkrdS | j  ||¡}|r&| ¡ }| j ||¡}|rètt| d¡ƒdkdƒ tt| d¡ƒdkdƒ t|d kp’|tt| d¡ƒt| d¡ƒƒkdƒ | j|| ¡ |d d\}}|d krÄ|d fS | j	 ||¡}t|d	ƒ || ¡ fS t| d
ƒ | j
 ||¡}|r^tt| d¡ƒdkdƒ tt| d¡ƒdkdƒ tt| d¡ƒt| d¡ƒƒ| ¡ fS | j ||¡}|rÒ| ¡ }i }| j ||¡}xv|s| j|||d d\}}|d kr¾|d fS | j|||d d\}	}|	||< |d krð|d fS | j ||¡}qŽW | ¡ }| j ||¡}|rÂyt|d ƒ}
W n@ tttfk
rp } ztd| dd ¡ ƒ|‚W d d }~X Y nX || ¡ | ¡ |
 … }| j || ¡ |
 ¡}t|dƒ | ¡ }tt|ƒ|ƒ}nt|ƒ}||fS | j ||¡}|r\| ¡ }g }| j ||¡}xN|sN| j|||d d\}	}| |	¡ |d kr<|d fS | j ||¡}qW || ¡ fS | j ||¡}|r|d | ¡ fS | j ||¡}|rœd| ¡ fS | j ||¡}|r¼d| ¡ fS | j ||¡}|rìt|  | d¡¡ƒ| ¡ fS | j ||¡}|rt| d¡ƒ| ¡ fS | j  ||¡}|r@t!| d¡ƒ| ¡ fS | j" ||¡}|r¤t#dd„ | d¡D ƒƒ}t$|ƒd dkrŒ| t%dƒ¡ t# &| 'd¡¡| ¡ fS | j( ||¡}|rÈ|  )|| ¡ ¡S tdt*|||d … ƒ ƒ‚d S )Nr   )NNr`   z<indirect object definition: object ID must be greater than 0r“   z;indirect object definition: generation must be non-negativez2indirect object definition different than expected)Úmax_nestingz(indirect object definition end not foundz$indirect object definition not foundz;indirect object reference: object ID must be greater than 0z:indirect object reference: generation must be non-negatives   Lengthz)bad or missing Length in stream dict (%r)zstream end not foundTFc             S   s   g | ]}|d kr|‘qS )s   0123456789abcdefABCDEFr   )r8   r>   r   r   r   ú
<listcomp>a  s    z'PdfParser.get_value.<locals>.<listcomp>ó   0zus-asciizunrecognized object: é    )+Ú
re_commentr  r  Úre_indirect_def_startrG   r¡   r  rH   r  Úre_indirect_def_endÚre_indirect_referenceÚre_dict_startr  Úre_stream_startÚ	TypeErrorr^   r   r@   r6   Úre_stream_endr°   rŽ   Úre_array_startÚre_array_endr…   Úre_nullÚre_trueÚre_falser  rp   ry   Úre_intÚre_realr½   Úre_string_hexrƒ   r;   r   r  r<   Úre_string_litÚget_literal_stringrv   )rz   r{   r¨   Úexpect_indirectr!  r  Úobjectr‡   r[   r\   Z
stream_lenr§   Zstream_dataZ
hex_stringr   r   r   r  ó  sÖ    

&




zPdfParser.get_valuesF   (\\[nrtbf()\\])|(\\[0-9]{1,3})|(\\(\r\n|\r|\n))|(\r\n|\r|\n)|(\()|(\))ó   nr«   ó   ró   ó   tó   	ó   bó   ó   fó   r»   c             C   s  d}t ƒ }xø| j ||¡D ]æ}| ||| ¡ … ¡ | d¡rZ| | j| d¡d  ¡ nž| d¡r„| t| d¡dd … dƒ¡ nt| d¡rnh| d¡r¦| d¡ nR| d¡rÄ| d	¡ |d7 }n4| d
¡rø|dkræt	|ƒ| 
¡ fS | d¡ |d8 }| 
¡ }qW tdƒ‚d S )Nr   r`   r“   é   r  é   r«   é   r¼   é   rº   zunfinished literal string)rƒ   Úre_lit_str_tokenr  r†   r  r  Úescaped_charsr…   r¡   rs   r  r@   )rz   r{   r¨   Znesting_depthr‡   r  r   r   r   r6  ƒ  s,    

 






zPdfParser.get_literal_strings   xrefs+   ([0-9]{10}) ([0-9]{5}) ([fn])( \r| \n|\r\n)c             C   s  d}| j  | j|| j ¡}t|dƒ | ¡ }xà| j | j|¡}|sNt|dƒ P d}| ¡ }t| d¡ƒ}t| d¡ƒ}x’t	||| ƒD ]€}| j
 | j|¡}t|dƒ | ¡ }| d¡d	k}t| d¡ƒ}	|s†t| d¡ƒ|	f}
t|| jkpö| j| |
kd
ƒ |
| j|< q†W q.W |S )NFzxref section start not foundzxref subsection start not foundTr`   r“   zxref entry not foundr  r@  z)xref entry duplicated (and not identical))Úre_xref_section_startr  r²   rÆ   rG   r  Úre_xref_subsection_startr¡   r  r‰   Úre_xref_entryrÚ   )rI   r
  Zsubsection_foundr  r¨   Zfirst_objectÚnum_objectsró   Zis_freerN   Z	new_entryr   r   r   r  §  s8    

zPdfParser.read_xref_tablec          
   C   st   | j |d  \}}t||d kd|d › d|d › d|› d|› ƒ | j| j|| j t|Ž |dd }|| j|< |S )Nr   r`   zexpected to find generation z for object ID z) in xref table, instead found generation z at offset )r7  r!  )rÚ   rG   r  r²   rÆ   rH   rÌ   )rI   rö   r!  r¨   rN   r\   r   r   r   r  È  s    
&
zPdfParser.read_indirectc             C   sn   |d kr| j }t|d dkdƒ g }xD|d D ]8}|  |¡}|d dkrT| |¡ q.| | j|d¡ q.W |S )Ns   Types   Pagesz%/Type of page tree node is not /Pagess   Kidss   Page)Únode)rÔ   rG   r  r…   r†   r  )rI   rL  rÕ   ZkidZ
kid_objectr   r   r   r  Ø  s    
zPdfParser.linearize_page_tree)NNNr   rÃ   )N)N)F)Nr   )r   )N)IrA   rB   rC   rD   rZ   rÝ   râ   rä   rã   rÞ   rÛ   rè   rê   rð   rî   rõ   rò   rï   rì   ÚstaticmethodrÊ   rÍ   rí   Ú	delimiterZdelimiter_or_wsÚ
whitespaceZwhitespace_or_hexZwhitespace_optionalZwhitespace_mandatoryZwhitespace_optional_no_nlZnewline_onlyÚnewlineÚreÚcompileÚDOTALLr  r  r  r  Zre_whitespace_optionalr  r)  r  rˆ   r  r  ry   r/  r0  r1  r2  r3  r-  r.  r4  r5  r(  r&  r'  r%  r*  r,  r  rF  r   rG  r6  rH  rI  rJ  r  r  r  r   r   r   r   rx   a  sÀ   
%
	

.*

""y
!
rx   )r¥   r   r   rþ   ræ   rQ  r¢   r¶   r	   r5   r?   rÄ   r@   rG   Ú
namedtuplerH   rT   rU   rp   rÁ   rŠ   r   rŽ   r®   r°   rŒ   rx   r   r   r   r   Ú<module>   s~   
['0