B
    ­6¥d‡  ã            *   @   s`  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZdd„ Zdddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-œ)Z	d.d/„ Z
G d0d1„ d1eƒZd2d3„ ZG d4d5„ d5e d6d7d8g¡ƒZG d9d:„ d:eƒZG d;d<„ d<ƒZG d=d>„ d>ƒZG d?d@„ d@eƒZG dAdB„ dBejƒZG dCdD„ dDƒZG dEdF„ dFƒZdGdH„ ZG dIdJ„ dJƒZdS )Ké    Nc             C   s   t j|  d¡ S )NÚ	utf_16_be)ÚcodecsÚBOM_UTF16_BEÚencode)Ús© r   ú1/tmp/pip-unpacked-wheel-lxgm_btu/PIL/PdfParser.pyÚencode_text   s    r	   úu   Ë˜u   Ë‡u   Ë†u   Ë™u   Ëu   Ë›u   Ëšu   Ëœu   â€¢u   â€ u   â€¡u   â€¦u   â€”u   â€“u   Æ’u   â„u   â€¹u   â€ºu   âˆ’u   â€°u   â€žu   â€œu   â€u   â€˜u   â€™u   â€šu   â„¢u   ï¬u   ï¬‚u   Åu   Å’u   Å u   Å¸u   Å½u   Ä±u   Å‚u   Å“u   Å¡u   Å¾u   â‚¬))é   é   é   é   é   é   é   é   é   é€   é   é‚   éƒ   é„   é…   é†   é‡   éˆ   é‰   éŠ   é‹   éŒ   é   éŽ   é   é   é‘   é’   é“   é”   é•   é–   é—   é˜   é™   éš   é›   éœ   é   éž   é    c             C   sH   | d t tjƒ… tjkr0| t tjƒd …  d¡S d dd„ | D ƒ¡S d S )Nr   Ú c             s   s   | ]}t  |t|ƒ¡V  qd S )N)ÚPDFDocEncodingÚgetÚchr)Ú.0Úbyter   r   r   ú	<genexpr>B   s    zdecode_text.<locals>.<genexpr>)Úlenr   r   ÚdecodeÚjoin)Úbr   r   r   Údecode_text>   s    r?   c               @   s   e Zd ZdZdS )ÚPdfFormatErrorz\An error that probably indicates a syntactic or semantic error in the
    PDF file structureN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r   r   r@   E   s   r@   c             C   s   | st |ƒ‚d S )N)r@   )Ú	conditionÚerror_messager   r   r   Úcheck_format_conditionL   s    rG   c               @   s4   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ ZdS )ÚIndirectReferencec             C   s   d|  S )Nz%s %s Rr   )Úselfr   r   r   Ú__str__T   s    zIndirectReference.__str__c             C   s   |   ¡  d¡S )Nzus-ascii)rJ   r   )rI   r   r   r   Ú	__bytes__W   s    zIndirectReference.__bytes__c             C   s$   |j | j ko"|j| jko"|j| jkS )N)Ú	__class__Ú	object_idÚ
generation)rI   Úotherr   r   r   Ú__eq__Z   s    zIndirectReference.__eq__c             C   s
   | |k S )Nr   )rI   rO   r   r   r   Ú__ne__a   s    zIndirectReference.__ne__c             C   s   t | j| jfƒS )N)ÚhashrM   rN   )rI   r   r   r   Ú__hash__d   s    zIndirectReference.__hash__N)rA   rB   rC   rJ   rK   rP   rQ   rS   r   r   r   r   rH   Q   s
   rH   ZIndirectReferenceTuplerM   rN   c               @   s   e Zd Zdd„ ZdS )ÚIndirectObjectDefc             C   s   d|  S )Nz	%s %s objr   )rI   r   r   r   rJ   i   s    zIndirectObjectDef.__str__N)rA   rB   rC   rJ   r   r   r   r   rT   h   s   rT   c               @   sL   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Z	dd„ Z
dS )Ú	XrefTablec             C   s    i | _ i | _ddi| _d| _d S )Nr   i   F)Úexisting_entriesÚnew_entriesÚdeleted_entriesÚreading_finished)rI   r   r   r   Ú__init__n   s    
zXrefTable.__init__c             C   s2   | j r|| j|< n
|| j|< || jkr.| j|= d S )N)rY   rW   rV   rX   )rI   ÚkeyÚvaluer   r   r   Ú__setitem__t   s
    

zXrefTable.__setitem__c             C   s*   y
| j | S  tk
r$   | j| S X d S )N)rW   ÚKeyErrorrV   )rI   r[   r   r   r   Ú__getitem__|   s    
zXrefTable.__getitem__c             C   sŠ   || j kr0| j | d d }| j |= || j|< nV|| jkrX| j| d d }|| j|< n.|| jkrn| j| }ndt|ƒ d }t|ƒ‚d S )Né   z
object ID z+ cannot be deleted because it doesn't exist)rW   rX   rV   ÚstrÚ
IndexError)rI   r[   rN   Úmsgr   r   r   Ú__delitem__‚   s    


zXrefTable.__delitem__c             C   s   || j kp|| jkS )N)rV   rW   )rI   r[   r   r   r   Ú__contains__’   s    zXrefTable.__contains__c             C   s.   t t| j ¡ ƒt| j ¡ ƒB t| j ¡ ƒB ƒS )N)r;   ÚsetrV   ÚkeysrW   rX   )rI   r   r   r   Ú__len__•   s    zXrefTable.__len__c             C   s*   t | j ¡ ƒt | j ¡ ƒ t | j ¡ ƒB S )N)rf   rV   rg   rX   rW   )rI   r   r   r   rg   œ   s    zXrefTable.keysc          	   C   sX  t t| j ¡ ƒt| j ¡ ƒB ƒ}t t| j ¡ ƒƒ}| ¡ }| d¡ x|rRd }xPt|ƒD ]<\}}|d ksx|d |kr~|}q\|d |… }||d … }P q\W |}d }| d|d t|ƒf ¡ x|D ]ˆ}	|	| jkrè| d| j|	  ¡ qÄ| 	d¡}
t
|	|
kd|	› d|
› ƒ y|d }W n tk
r2   d}Y nX | d|| j|	 f ¡ qÄW qHW |S )	Ns   xref
r`   s   %d %d
r   s   %010d %05d n 
z*expected the next deleted object ID to be z, instead found s   %010d %05d f 
)Úsortedrf   rW   rg   rX   ÚtellÚwriteÚ	enumerater;   ÚpoprG   rb   )rI   Úfrg   Zdeleted_keysZ	startxrefÚprevÚindexr[   Zcontiguous_keysrM   Zthis_deleted_object_idZnext_in_linked_listr   r   r   rk   ¡   s<     





zXrefTable.writeN)rA   rB   rC   rZ   r]   r_   rd   re   rh   rg   rk   r   r   r   r   rU   m   s   rU   c               @   sd   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ Zedd„ ƒZ	e
eddƒƒdd„ dD ƒ Zdd„ ZdS )ÚPdfNamec             C   s6   t |tƒr|j| _nt |tƒr&|| _n| d¡| _d S )Nzus-ascii)Ú
isinstancerq   ÚnameÚbytesr   )rI   rs   r   r   r   rZ   Ê   s
    


zPdfName.__init__c             C   s   | j  d¡S )Nzus-ascii)rs   r<   )rI   r   r   r   Úname_as_strÒ   s    zPdfName.name_as_strc             C   s    t |tƒr|j| jkp|| jkS )N)rr   rq   rs   )rI   rO   r   r   r   rP   Õ   s    zPdfName.__eq__c             C   s
   t | jƒS )N)rR   rs   )rI   r   r   r   rS   Ú   s    zPdfName.__hash__c             C   s   dt | jƒ› dS )NzPdfName(ú))Úreprrs   )rI   r   r   r   Ú__repr__Ý   s    zPdfName.__repr__c             C   s   | t  |¡ƒS )N)Ú	PdfParserÚinterpret_name)ÚclsÚdatar   r   r   Úfrom_pdf_streamà   s    zPdfName.from_pdf_streamé!   é   c             C   s   h | ]}t |ƒ’qS r   )Úord)r8   Úcr   r   r   ú	<setcomp>ä   s    zPdfName.<setcomp>z#%/()<>[]{}c             C   sD   t dƒ}x2| jD ](}|| jkr*| |¡ q| d| ¡ qW t|ƒS )Nó   /s   #%02X)Ú	bytearrayrs   Úallowed_charsÚappendÚextendrt   )rI   Úresultr>   r   r   r   rK   æ   s    
zPdfName.__bytes__N)rA   rB   rC   rZ   ru   rP   rS   rx   Úclassmethodr}   rf   Úranger…   rK   r   r   r   r   rq   É   s   rq   c               @   s   e Zd Zdd„ ZdS )ÚPdfArrayc             C   s   dd  dd„ | D ƒ¡ d S )Ns   [ ó    c             s   s   | ]}t |ƒV  qd S )N)Úpdf_repr)r8   Úxr   r   r   r:   ò   s    z%PdfArray.__bytes__.<locals>.<genexpr>s    ])r=   )rI   r   r   r   rK   ñ   s    zPdfArray.__bytes__N)rA   rB   rC   rK   r   r   r   r   r‹   ð   s   r‹   c               @   s$   e Zd Zdd„ Zdd„ Zdd„ ZdS )ÚPdfDictc             C   s,   |dkrt j | ||¡ n|| | d¡< d S )Nr|   zus-ascii)ÚcollectionsÚUserDictÚ__setattr__r   )rI   r[   r\   r   r   r   r’   ö   s    zPdfDict.__setattr__c          
   C   s,  y| |  d¡ }W n, tk
r> } zt|ƒ|‚W d d }~X Y nX t|tƒrRt|ƒ}| d¡r(| d¡rt|dd … }d}t|ƒdkrÀ|d }t	|dd… ƒd	 }t|ƒd
krÀ|t	|dd
… ƒ7 }dd t|ƒd … }t
 |d t|ƒd … |¡}|dkr(|d	9 }|dkr|d9 }t
 t |¡| ¡}|S )Nzus-asciiÚDatezD:é   ÚZé   é   é   é<   é   é   z%Y%m%d%H%M%S)ú+ú-rœ   éÿÿÿÿ)r   r^   ÚAttributeErrorrr   rt   r?   ÚendswithÚ
startswithr;   ÚintÚtimeÚstrptimeÚgmtimeÚcalendarÚtimegm)rI   r[   r\   ÚeZrelationshipÚoffsetÚformatr   r   r   Ú__getattr__ü   s.    



zPdfDict.__getattr__c             C   sr   t dƒ}xV|  ¡ D ]J\}}|d kr$qt|ƒ}| d¡ | tt|ƒƒ¡ | d¡ | |¡ qW | d¡ t|ƒS )Ns   <<ó   
rŒ   s   
>>)r„   Úitemsr   r‡   rt   rq   )rI   Úoutr[   r\   r   r   r   rK     s    


zPdfDict.__bytes__N)rA   rB   rC   r’   r«   rK   r   r   r   r   r   õ   s   r   c               @   s   e Zd Zdd„ Zdd„ ZdS )Ú	PdfBinaryc             C   s
   || _ d S )N)r|   )rI   r|   r   r   r   rZ   &  s    zPdfBinary.__init__c             C   s   dd  dd„ | jD ƒ¡ S )Ns   <%s>ó    c             s   s   | ]}d | V  qdS )s   %02XNr   )r8   r>   r   r   r   r:   *  s    z&PdfBinary.__bytes__.<locals>.<genexpr>)r=   r|   )rI   r   r   r   rK   )  s    zPdfBinary.__bytes__N)rA   rB   rC   rZ   rK   r   r   r   r   r¯   %  s   r¯   c               @   s   e Zd Zdd„ Zdd„ ZdS )Ú	PdfStreamc             C   s   || _ || _d S )N)Ú
dictionaryÚbuf)rI   r²   r³   r   r   r   rZ   .  s    zPdfStream.__init__c             C   sŠ   y| j j}W n tk
r"   | jS X |dkrjy| j j}W n tk
rT   | j j}Y nX tj| jt|ƒdS dt	| j jƒ› d}t
|ƒ‚d S )Ns   FlateDecode)Úbufsizezstream filter z unknown/unsupported)r²   ÚFilterrŸ   r³   ZDLÚLengthÚzlibÚ
decompressr¢   rw   ÚNotImplementedError)rI   ÚfilterZexpected_lengthrc   r   r   r   r<   2  s    zPdfStream.decodeN)rA   rB   rC   rZ   r<   r   r   r   r   r±   -  s   r±   c             C   s
  | dkrdS | dkrdS | d kr$dS t | ttttfƒr>t| ƒS t | ttfƒrZt| ƒ 	d¡S t | t
jƒr€dt
 d| ¡ 	d¡ d	 S t | tƒr–tt| ƒƒS t | tƒr¬tt| ƒƒS t | tƒrÂtt| ƒƒS t | tƒ rþ|  d
d¡} |  dd¡} |  d	d¡} d|  d	 S t| ƒS d S )NTs   trueFs   falses   nullzus-asciis   (D:z%Y%m%d%H%M%SZó   )ó   \s   \\ó   (s   \(s   \))rr   rq   r   r‹   r¯   rt   r¢   Úfloatra   r   r£   Ústruct_timeÚstrftimeÚdictÚlistr   r	   Úreplace)rŽ   r   r   r   r   B  s0    


r   c                @   sî  e Zd ZdZdrdd„Zdd„ Zd	d
„ Zdd„ Zdd„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ Zdd„ Zdsdd„Zdd„ Zdd „ Zd!d"„ Zed#d$„ ƒZd%d&„ Zdtd'd(„Zd)Zd*Zd+Zd,Zed- Zed. Zd/Zd0Zee e Ze  !ed1 e d2 e d3 e d4 e d5 e d6 e j"¡Z#e  !ed1 e d7 e d3 e d4 e d5 e e j"¡Z$d8d9„ Z%d:d;„ Z&e  !e¡Z'e  !ed< e d= ¡Z(e  !ed> ¡Z)e  !ed? e ¡Z*e+d@dA„ ƒZ,e  !dB¡Z-e+dudDdE„ƒZ.e  !edF e d= ¡Z/e  !edG e d= ¡Z0e  !edH e d= ¡Z1e  !edI e d= ¡Z2e  !edJ e d= ¡Z3e  !edK ¡Z4e  !edL ¡Z5e  !edM e dN ¡Z6e  !edO ¡Z7e  !edP e dP e dQ e d= ¡Z8e  !edP e dP e dR e d= ¡Z9e  !edS e d= ¡Z:e  !dTe dU e dV ¡Z;e  !edW ¡Z<e  !edX e d= ¡Z=e+dvdZd[„ƒZ>e  !d\¡Z?d]d^d_d`dadbdcdddedfdTdTd=d=dgdge@d]ƒd^e@d_ƒd`e@daƒdbe@dcƒdde@deƒdfe@dTƒdTe@d=ƒd=e@dgƒdgiZAe+dhdi„ ƒZBe  !edj e ¡ZCe  !ed4 e d4 e e ¡ZDe  !dk¡ZEdldm„ ZFdwdndo„ZGdxdpdq„ZHdS )yry   z|Based on
    https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf
    Supports PDF up to 1.4
    Nr   Úrbc             C   s  |r|rd}t |ƒ‚|| _|| _|| _|| _d| _d| _|d k	r^|d kr^t||ƒ | _}d| _|d k	r’|  |¡ | _}d| _|s’t	|dƒr’|j
| _i | _|r¦|  ¡  nTd | _| _tƒ | _d | _tƒ | _d | _i | _g | _g | _d | _d | _i | _tƒ | _d| j_|r|  ¡  d S )Nz4specify buf or f or filename, but not both buf and fFTrs   r   )ÚRuntimeErrorÚfilenamer³   rn   Ústart_offsetÚshould_close_bufÚshould_close_fileÚopenÚget_buf_from_fileÚhasattrrs   Úcached_objectsÚread_pdf_infoÚfile_size_totalÚfile_size_thisr   ÚrootÚroot_refÚinfoÚinfo_refÚpage_tree_rootÚpagesÚ
orig_pagesÚ	pages_refÚlast_xref_section_offsetÚtrailer_dictrU   Ú
xref_tablerY   Úseek_end)rI   rÆ   rn   r³   rÇ   Úmoderc   r   r   r   rZ   e  sF    
zPdfParser.__init__c             C   s   | S )Nr   )rI   r   r   r   Ú	__enter__‹  s    zPdfParser.__enter__c             C   s   |   ¡  dS )NF)Úclose)rI   Úexc_typeÚ	exc_valueÚ	tracebackr   r   r   Ú__exit__Ž  s    zPdfParser.__exit__c             C   s   |   ¡  |  ¡  d S )N)Ú	close_bufrÜ   )rI   r   r   r   Ústart_writing’  s    zPdfParser.start_writingc             C   s.   y| j  ¡  W n tk
r"   Y nX d | _ d S )N)r³   rß   rŸ   )rI   r   r   r   rä   –  s
    zPdfParser.close_bufc             C   s2   | j r|  ¡  | jd k	r.| jr.| j ¡  d | _d S )N)rÈ   rä   rn   rÉ   rß   )rI   r   r   r   rß     s
    
zPdfParser.closec             C   s   | j  dtj¡ d S )Nr   )rn   ÚseekÚosÚSEEK_END)rI   r   r   r   rÜ   ¤  s    zPdfParser.seek_endc             C   s   | j  d¡ d S )Ns	   %PDF-1.4
)rn   rk   )rI   r   r   r   Úwrite_header§  s    zPdfParser.write_headerc             C   s   | j  d|› d ¡ ¡ d S )Nz% Ú
)rn   rk   r   )rI   r   r   r   r   Úwrite_commentª  s    zPdfParser.write_commentc             C   sl   |   ¡  |  | j ¡ ¡| _|  d¡| _|  ¡  | j| jtdƒ| jd | j| jtdƒt	| j
ƒ| j
d | jS )Nr   s   Catalog)ÚTypeZPagess   Pages)rì   ZCountZKids)Údel_rootÚnext_object_idrn   rj   rÒ   rØ   Úrewrite_pagesÚ	write_objrq   r;   rÖ   )rI   r   r   r   Úwrite_catalog­  s    
zPdfParser.write_catalogc             C   s  g }x®t | jƒD ] \}}| j| }| j|j= | |tdƒ ¡ || jkrJqi }x | ¡ D ]\}}||| 	¡ < qXW | j
|d< | jd|Ž}x(t | jƒD ]\}	}
|
|kr’|| j|	< q’W qW xB|D ]:}x4|rò| j| }|j| jkrä| j|j= | dd ¡}qÀW qºW g | _d S )Ns   ParentÚParent)N)rl   r×   rÍ   rÛ   rM   r†   rq   rÖ   r­   ru   rØ   Ú
write_pager6   )rI   Zpages_tree_nodes_to_deleteÚiZpage_refZ	page_infoZstringified_page_infor[   r\   Znew_page_refÚjZcur_page_refZpages_tree_node_refZpages_tree_noder   r   r   rï   »  s,    






zPdfParser.rewrite_pagesc             C   sš   |r|   ¡  || _| jr(|  d | j¡| _| j | j¡}t| jƒ}| j|dœ}| j	d k	r`| j	|d< | jrp| j|d< || _	| j dt
t|ƒƒ d|  ¡ d S )N)s   Roots   Sizes   Prevs   Infos   trailer
s   
startxref
%d
%%%%EOF)rí   rÒ   rÓ   rð   rÔ   rÛ   rk   rn   r;   rÙ   rt   r   )rI   Znew_root_refZ
start_xrefZnum_entriesrÚ   r   r   r   Úwrite_xref_and_trailerØ  s    



z PdfParser.write_xref_and_trailerc             O   sL   t |tƒr| j| }d|kr(tdƒ|d< d|kr:| j|d< | j|f|ž|ŽS )Nrì   s   Pagerò   )rr   r¢   rÖ   rq   rØ   rð   )rI   ÚrefÚobjsÚdict_objr   r   r   ró   ì  s    


zPdfParser.write_pagec             O   sÈ   | j }|d kr|  | ¡ ¡}n| ¡ |jf| j|j< | tt|Ž ƒ¡ | 	dd ¡}|d k	rft
|ƒ|d< |rx| t|ƒ¡ x|D ]}| t|ƒ¡ q~W |d k	rº| d¡ | |¡ | d¡ | d¡ |S )NÚstreamr¶   s   stream
s   
endstream
s   endobj
)rn   rî   rj   rN   rÛ   rM   rk   rt   rT   rm   r;   r   )rI   r÷   rø   rù   rn   rú   Úobjr   r   r   rð   õ  s$    




zPdfParser.write_objc             C   s.   | j d krd S | j| j j= | j| jd j= d S )Ns   Pages)rÒ   rÛ   rM   rÑ   )rI   r   r   r   rí   
  s    
zPdfParser.del_rootc             C   sT   t | dƒr|  ¡ S t | dƒr$|  ¡ S ytj|  ¡ dtjdS  tk
rN   dS X d S )NÚ	getbufferÚgetvaluer   )Úaccessr°   )rÌ   rü   rý   ÚmmapÚfilenoZACCESS_READÚ
ValueError)rn   r   r   r   rË     s    

zPdfParser.get_buf_from_filec             C   sü   t | jƒ| _| j| j | _|  ¡  | jd | _| j dd ¡| _	t
|  | j¡ƒ| _| j	d krdt
ƒ | _nt
|  | j	¡ƒ| _td| jkdƒ t| jd dkdƒ td| jkdƒ tt| jd tƒd	ƒ | jd | _|  | j¡| _|  | j¡| _| jd d … | _d S )
Ns   Roots   Infos   Typez/Type missing in Roots   Catalogz/Type in Root is not /Catalogs   Pagesz/Pages missing in Rootz+/Pages in Root is not an indirect reference)r;   r³   rÏ   rÇ   rÐ   Úread_trailerrÚ   rÒ   r6   rÔ   r   Úread_indirectrÑ   rÓ   rG   rr   rH   rØ   rÕ   Úlinearize_page_treerÖ   r×   )rI   r   r   r   rÎ     s(    

zPdfParser.read_pdf_infoc             C   sX   yt t| j ¡ ƒd dƒ}W n tk
r:   t ddƒ}Y nX |d k	rT|df| j|j< |S )Nr`   r   )rH   ÚmaxrÛ   rg   r  rM   )rI   r©   Ú	referencer   r   r   rî   8  s    zPdfParser.next_object_ids   [][()<>{}/%]s$   [][()<>{}/%\000\011\012\014\015\040]s   [\000\011\012\014\015\040]s#   [\000\011\012\014\015\0400-9a-fA-F]ó   *ó   +s   [\000\011\014\040]*s   [\r\n]+s   trailers   <<(.*>>)s	   startxrefs   ([0-9]+)s   %%EOFó   $s	   <<(.*?>>)c             C   sÄ   t | jƒd }|| jk r| j}| j | j|¡}t|dƒ |}x$|r`|}| j | j| ¡ d ¡}q>W |sj|}| d¡}t| d¡ƒ| _	|  
|¡| _tƒ | _| j| j	d d| jkrÀ|  | jd ¡ d S )Ni @  ztrailer end not foundé   r`   r”   )Úxref_section_offsets   Prev)r;   r³   rÇ   Úre_trailer_endÚsearchrG   ÚstartÚgroupr¢   rÙ   Úinterpret_trailerrÚ   rU   rÛ   Úread_xref_tableÚread_prev_trailer)rI   Zsearch_start_offsetÚmZ
last_matchÚtrailer_datar   r   r   r  j  s$    



zPdfParser.read_trailerc             C   sv   | j |d}| j | j||d … ¡}t|dƒ | d¡}tt| d¡ƒ|kdƒ |  |¡}d|krr|  |d ¡ d S )N)r  i @  zprevious trailer not foundr`   r”   zGxref section offset in previous trailer doesn't match what was expecteds   Prev)	r  Úre_trailer_prevr  r³   rG   r  r¢   r  r  )rI   r  Ztrailer_offsetr  r  rÚ   r   r   r   r    s    


zPdfParser.read_prev_trailers   /([!-$&'*-.0-;=?-Z\\^-z|~]+)(?=r»   s   <<s   >>c             C   sÂ   i }d}x|| j  ||¡}|sV| j ||¡}t|o>| ¡ t|ƒkdt||d … ƒ ƒ P |  | d¡¡}|  	|| ¡ ¡\}}|||< q
W td|koœt
|d tƒdƒ td|ko¸t
|d tƒdƒ |S )Nr   z+name not found in trailer, remaining data: r`   s   Sizez&/Size not in trailer or not an integers   Rootz1/Root not in trailer or not an indirect reference)Úre_nameÚmatchÚre_dict_endrG   Úendr;   rw   rz   r  Ú	get_valuerr   r¢   rH   )r{   r  Útrailerr©   r  r[   r\   r   r   r   r  ˜  s*    zPdfParser.interpret_trailers   ([^#]*)(#([0-9a-fA-F]{2}))?Fc             C   sr   d}xR| j  |¡D ]B}| d¡rF|| d¡t | d¡ d¡¡ 7 }q|| d¡7 }qW |rf| d¡S t|ƒS d S )Nr°   é   r`   zus-asciizutf-8)Úre_hashes_in_nameÚfinditerr  r„   Úfromhexr<   rt   )r{   ÚrawZas_textrs   r  r   r   r   rz   µ  s    
&
zPdfParser.interpret_names   null(?=s   true(?=s   false(?=s   ([-+]?[0-9]+)(?=s)   ([-+]?([0-9]+\.[0-9]*|[0-9]*\.[0-9]+))(?=s   \[ó   ]s   <(s   *)>s   \(s   ([-+]?[0-9]+)s   R(?=s   obj(?=s	   endobj(?=r½   s	   %[^\r\n]*s   )*s   stream\r?\ns   endstream(?=rž   c          
   C   sð  |dkrdS | j  ||¡}|r&| ¡ }| j ||¡}|rètt| d¡ƒdkdƒ tt| d¡ƒdkdƒ t|d kp’|tt| d¡ƒt| d¡ƒƒkdƒ | j|| ¡ |d d\}}|d krÄ|d fS | j	 ||¡}t|d	ƒ || ¡ fS t| d
ƒ | j
 ||¡}|r^tt| d¡ƒdkdƒ tt| d¡ƒdkdƒ tt| d¡ƒt| d¡ƒƒ| ¡ fS | j ||¡}|rÖ| ¡ }i }| j ||¡}xv|s| j|||d d\}}|d kr¾|d fS | j|||d d\}	}|	||< |d krð|d fS | j ||¡}qŽW | ¡ }| j ||¡}|rÆyt|d ƒ}
W nD tttfk
rt } zd| dd ¡ }t|ƒ|‚W d d }~X Y nX || ¡ | ¡ |
 … }| j || ¡ |
 ¡}t|dƒ | ¡ }tt|ƒ|ƒ}nt|ƒ}||fS | j ||¡}|r`| ¡ }g }| j ||¡}xN|sR| j|||d d\}	}| |	¡ |d kr@|d fS | j ||¡}qW || ¡ fS | j ||¡}|r€d | ¡ fS | j ||¡}|r d| ¡ fS | j ||¡}|rÀd| ¡ fS | j ||¡}|rðt|  | d¡¡ƒ| ¡ fS | j ||¡}|rt| d¡ƒ| ¡ fS | j  ||¡}|rDt!| d¡ƒ| ¡ fS | j" ||¡}|r¨t#dd„ | d¡D ƒƒ}t$|ƒd dkr| t%dƒ¡ t# &| 'd¡¡| ¡ fS | j( ||¡}|rÌ|  )|| ¡ ¡S dt*|||d … ƒ }t|ƒ‚d S )Nr   )NNr`   z<indirect object definition: object ID must be greater than 0r”   z;indirect object definition: generation must be non-negativez2indirect object definition different than expected)Úmax_nestingz(indirect object definition end not foundz$indirect object definition not foundz;indirect object reference: object ID must be greater than 0z:indirect object reference: generation must be non-negatives   Lengthz)bad or missing Length in stream dict (%r)zstream end not foundTFc             s   s   | ]}|d kr|V  qdS )s   0123456789abcdefABCDEFNr   )r8   r>   r   r   r   r:   a  s    z&PdfParser.get_value.<locals>.<genexpr>ó   0zus-asciizunrecognized object: é    )+Ú
re_commentr  r  Úre_indirect_def_startrG   r¢   r  rH   r  Úre_indirect_def_endÚre_indirect_referenceÚre_dict_startr  Úre_stream_startÚ	TypeErrorr^   r  r6   r@   Úre_stream_endr±   r   Úre_array_startÚre_array_endr†   Úre_nullÚre_trueÚre_falser  rq   rz   Úre_intÚre_realr¾   Úre_string_hexr„   r;   r€   r  r<   Úre_string_litÚget_literal_stringrw   )r{   r|   r©   Úexpect_indirectr"  r  Úobjectrˆ   r[   r\   Z
stream_lenr¨   rc   Zstream_dataZ
hex_stringr   r   r   r  ó  sÖ    

&





zPdfParser.get_valuesF   (\\[nrtbf()\\])|(\\[0-9]{1,3})|(\\(\r\n|\r|\n))|(\r\n|\r|\n)|(\()|(\))ó   nr¬   ó   ró   ó   tó   	ó   bó   ó   fó   r¼   c             C   s  d}t ƒ }xø| j ||¡D ]æ}| ||| ¡ … ¡ | d¡rZ| | j| d¡d  ¡ nž| d¡r„| t| d¡dd … dƒ¡ nt| d¡rnh| d¡r¦| d¡ nR| d¡rÄ| d	¡ |d7 }n4| d
¡rø|dkræt	|ƒ| 
¡ fS | d¡ |d8 }| 
¡ }qW d}t|ƒ‚d S )Nr   r`   r”   é   r  é   r¬   é   r½   é   r»   zunfinished literal string)r„   Úre_lit_str_tokenr  r‡   r  r  Úescaped_charsr†   r¢   rt   r  r@   )r{   r|   r©   Znesting_depthrˆ   r  rc   r   r   r   r6  „  s.    

 






zPdfParser.get_literal_strings   xrefs+   ([0-9]{10}) ([0-9]{5}) ([fn])( \r| \n|\r\n)c             C   s  d}| j  | j|| j ¡}t|dƒ | ¡ }xà| j | j|¡}|sNt|dƒ P d}| ¡ }t| d¡ƒ}t| d¡ƒ}x’t	||| ƒD ]€}| j
 | j|¡}t|dƒ | ¡ }| d¡d	k}t| d¡ƒ}	|s†t| d¡ƒ|	f}
t|| jkpö| j| |
kd
ƒ |
| j|< q†W q.W |S )NFzxref section start not foundzxref subsection start not foundTr`   r”   zxref entry not foundr  r@  z)xref entry duplicated (and not identical))Úre_xref_section_startr  r³   rÇ   rG   r  Úre_xref_subsection_startr¢   r  rŠ   Úre_xref_entryrÛ   )rI   r  Zsubsection_foundr  r©   Zfirst_objectÚnum_objectsrô   Zis_freerN   Z	new_entryr   r   r   r  ©  s8    

zPdfParser.read_xref_tablec          
   C   st   | j |d  \}}t||d kd|d › d|d › d|› d|› ƒ | j| j|| j t|Ž |dd }|| j|< |S )Nr   r`   zexpected to find generation z for object ID z) in xref table, instead found generation z at offset )r7  r"  )rÛ   rG   r  r³   rÇ   rH   rÍ   )rI   r÷   r"  r©   rN   r\   r   r   r   r  Ê  s    
&
zPdfParser.read_indirectc             C   sn   |d kr| j }t|d dkdƒ g }xD|d D ]8}|  |¡}|d dkrT| |¡ q.| | j|d¡ q.W |S )Ns   Types   Pagesz%/Type of page tree node is not /Pagess   Kidss   Page)Únode)rÕ   rG   r  r†   r‡   r  )rI   rL  rÖ   ZkidZ
kid_objectr   r   r   r  Ú  s    
zPdfParser.linearize_page_tree)NNNr   rÄ   )N)N)F)Nrž   )rž   )N)IrA   rB   rC   rD   rZ   rÞ   rã   rå   rä   rß   rÜ   ré   rë   rñ   rï   rö   ró   rð   rí   ÚstaticmethodrË   rÎ   rî   Ú	delimiterZdelimiter_or_wsÚ
whitespaceZwhitespace_or_hexZwhitespace_optionalZwhitespace_mandatoryZwhitespace_optional_no_nlZnewline_onlyÚnewlineÚreÚcompileÚDOTALLr  r  r  r  Zre_whitespace_optionalr  r)  r  r‰   r  r  rz   r/  r0  r1  r2  r3  r-  r.  r4  r5  r(  r&  r'  r%  r*  r,  r  rF  r€   rG  r6  rH  rI  rJ  r  r  r  r   r   r   r   ry   _  sÀ   
&
	

.*

""z
!
ry   )r¦   r   r   rÿ   rç   rQ  r£   r·   r	   r5   r?   rÅ   r@   rG   Ú
namedtuplerH   rT   rU   rq   rÂ   r‹   r‘   r   r¯   r±   r   ry   r   r   r   r   Ú<module>   s~   
\'0