B
    5dy                 @   sf  d dl Z d dlZd dlZe jeZd dlZeje j	ed d dl
Z
d dlZd dlZd dlmZ d dlZd dlmZ d dlmZ eddZeddZedd	Zd d
lmZ d dlmZ e Zd dlmZm Z  d dl!m"Z"m#Z#m$Z$m%Z% d dl&m'Z'm(Z(m)Z) d dl*m+Z+m,Z, d dl-m.Z.m/Z/m0Z0 dddddddgZ1dgZ2dZ3ddgZ4e j5dZ6dZ7dddgZ8d Z9d!d gZ:d"d#id"d$id"d%id&d'd(d)d*d+d)d,d-d)d.d/d)d0d1d)d2d3d)d4d5d)d6d7d)d8d9d)d:d;d)d<d=d)d>d?d)d@dAd"dBiidCdAd"dDiidAdEd(d)idAd"dBiidCd"dFid"dGid"dHidIdJd(d)dKd+d)dLdMd)dNdOd)dPd-d)dQd/d)dRd1d)dSd3d)dTd5d)dUd7d)dVd9d)dWd;d)dXd=d)dYd?d)dZd[d)d\dAd"dBiidCd]d^d_d`dad)iidbdad)dcddd)dedfdgd)dhdid)dedjdkdlZ;d}dndoZ<dpdq Z=drds Z>e?dtdudvZ@dwdx ZAG dyd dejBZCG dzd de.ZDd{d| ZEdS )~    N )Path)BytesIO)Image.toolsppocrppstructure)predict_system)
get_logger)check_and_readget_image_file_list)maybe_downloaddownload_with_progressbaris_linkconfirm_model_dir_url)draw_ocrstr2bool	check_gpu)	init_argsdraw_structure_result)StructureSystemsave_structure_resto_excel	PaddleOCRPPStructurer   r   r   r   r   ZDBz2.6.1.3ZCRNNZ
SVTR_LCNetz~/.paddleocr/zPP-OCRv3zPP-OCRzPP-OCRv2zPP-StructureV2zPP-StructureurlzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tarzYhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar)chenmlzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tarz./ppocr/utils/ppocr_keys_v1.txt)r   	dict_pathzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tarz./ppocr/utils/en_dict.txtzShttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/korean_PP-OCRv3_rec_infer.tarz"./ppocr/utils/dict/korean_dict.txtzRhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/japan_PP-OCRv3_rec_infer.tarz!./ppocr/utils/dict/japan_dict.txtzXhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/chinese_cht_PP-OCRv3_rec_infer.tarz'./ppocr/utils/dict/chinese_cht_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ta_PP-OCRv3_rec_infer.tarz./ppocr/utils/dict/ta_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/te_PP-OCRv3_rec_infer.tarz./ppocr/utils/dict/te_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ka_PP-OCRv3_rec_infer.tarz./ppocr/utils/dict/ka_dict.txtzRhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_infer.tarz!./ppocr/utils/dict/latin_dict.txtzShttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_infer.tarz"./ppocr/utils/dict/arabic_dict.txtzUhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tarz$./ppocr/utils/dict/cyrillic_dict.txtzWhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_infer.tarz&./ppocr/utils/dict/devanagari_dict.txt)r   r   koreanjapanchinese_chttatekalatinarabiccyrillic
devanagarir   zRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar)detrecclszJhttps://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tarzRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tarz\https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tarz[https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar)r   r   	structurezRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tarz]https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tarzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tarz"./ppocr/utils/dict/french_dict.txtzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tarz"./ppocr/utils/dict/german_dict.txtzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tarzYhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tarz_https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tarz_https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tarz`https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tarzbhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tarzdhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tarz[https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tarzppocr/utils/dict/table_dict.txt)r   r   frenchgermanr!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r.   )zPP-OCRv3zPP-OCRv2zPP-OCRtabler   zahttps://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tarz)ppocr/utils/dict/table_structure_dict.txtzehttps://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tarzehttps://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tarz,ppocr/utils/dict/table_structure_dict_ch.txt)r   r   zahttps://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tarz6ppocr/utils/dict/layout_dict/layout_publaynet_dict.txtzfhttps://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tarz1ppocr/utils/dict/layout_dict/layout_cdla_dict.txt)r1   layout)zPP-StructurezPP-StructureV2)OCR	STRUCTURETc             C   s   dd l }t }| |_|jdtdd |jdtdd |jdtdd |jdtd	d |jd
ttddd |jdttddd x|jD ]}|j	dkrd |_
qW | r| S i }x|jD ]}|j
||j	< qW |jf |S d S )Nr   z--langr   )typedefaultz--detTz--recz--typeocrz--ocr_versionzPP-OCRv3aR  OCR Model version, the current model support list is as follows: 1. PP-OCRv3 Support Chinese and English detection and recognition model, and direction classifier model2. PP-OCRv2 Support Chinese detection and recognition model. 3. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.)r5   choicesr6   helpz--structure_versionzPP-StructureV2zModel version, the current model support list is as follows: 1. PP-Structure Support en table structure model. 2. PP-StructureV2 Support ch and en table structure model.)rec_char_dict_pathtable_char_dict_pathlayout_dict_path)argparser   add_helpadd_argumentstrr   SUPPORT_OCR_MODEL_VERSIONSUPPORT_STRUCTURE_MODEL_VERSION_actionsdestr6   
parse_args	Namespace)mMainr=   parseractionZinference_args_dict rJ   7/tmp/pip-unpacked-wheel-ndi_cy3p/paddleocr/paddleocr.pyrE   6  s8    

rE   c          *   C   sN  ddddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*g*}d+d,d-d.g}d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>g}d?d@dAdBdCdDdEdFdGdHdIdJdKg}| |krdL} n(| |krdM} n| |krdN} n| |krdO} | t dP t dQ kstdRt dP t dQ  | | dSkr"dS}n$| dTkr2dT}n| dUkrBdV}ndW}| |fS )XNafazbscscydadeesetfrgahrhuidisitZkuZlaltlvmimsmtnlnoocpiplptroZrs_latinskslsqsvswtltruzvir/   r0   arfaZugurruZrs_cyrillicbebgukmnZabqZadyZkbdZavaZdarZinhZcheZlbeZleztabhimrneZbhmaiangZbhoZmahZscknewZgomsaZbgcr'   r(   r)   r*   r3   r,   z!param lang must in {}, but got {}r   r.   )r   r'   r   r   )
MODEL_URLSDEFAULT_OCR_MODEL_VERSIONAssertionErrorformatkeys)langZ
latin_langZarabic_langZcyrillic_langZdevanagari_langdet_langrJ   rJ   rK   
parse_lang_  s8    


r   c             C   s   | dkrt }n| dkrt}ntt|  }||kr4|}||| krv||| krR|}n$td|||   t	d ||| | kr||| | kr|}n*td||| |  | t	d || | | S )Nr3   r4   z,{} models is not support, we only support {}z8lang {} is not support, we only support {} for {} models)
r   DEFAULT_STRUCTURE_MODEL_VERSIONNotImplementedErrorr   loggererrorr   r   sysexit)r5   versionZ
model_typer   ZDEFAULT_MODEL_VERSIONZ
model_urlsrJ   rJ   rK   get_model_config  s.    

r   )contentc             C   s   t j| t jd}t|tjS )N)Zdtype)np
frombufferuint8cv2imdecodeIMREAD_COLOR)r   Znp_arrrJ   rJ   rK   
img_decode  s    r   c          	   C   sh  t | trt| } t | tr8t| r4t| d d} | }t|\} }}|s|st|d}| }t|} W d Q R X | d kry|t	 }t	|}t
|}|d}	|	|d |d | }
tt|
dd}t|}t|tj}t|tj} W n   td| d S | d kr8td| d S t | tjrdt| jd	krdt| tj} | S )
Nztmp.jpgrbZRGBZjpegr   zutf-8)encodingzerror in loading image:{}   )
isinstancebytesr   r@   r   r   r   openreadr   r   convertsaveseekbase64	b64encode	b64decoder   r   r   r   r   r   r   r   r   ndarraylenshapeZcvtColorZCOLOR_GRAY2BGR)imgZ
image_fileflag_gifflag_pdffZimg_strbufimageZimZrgbZimage_bytesZdata_base64Zimage_decodeZ	img_arrayrJ   rJ   rK   	check_img  sF    








r   c                   s&   e Zd Z fddZdddZ  ZS )r   c                s  t dd}|jjf | |jtks4tdt|jt|j|_|j	sRt
tj |j| _t|j\}}td|jd|}t|jtjtdd||d \|_}td|jd|}t|jtjtdd||d \|_}td|jd	d
}	t|jtjtdd	|	d \|_}
|jdkrd|_nd|_|jsNt|j| t|j| t|j|
 |jtkrtt
dt t !d |j"t#krt
dt# t !d |j$dkrt%t&t'j(|d  |_$t
)| t* +| |j,| _,dS )zm
        paddleocr package
        args:
            **kwargs: other params show in paddleocr --help
        F)rG   z"ocr_version must in {}, but get {}r3   r+   whlr   r,   r-   r   zPP-OCRv3z
3, 48, 320z
3, 32, 320zdet_algorithm must in {}r   zrec_algorithm must in {}Nr    )-rE   __dict__updateocr_versionrA   r   r   r   use_gpushow_logr   setLevelloggingINFOuse_angle_clsr   r   r   r   det_model_dirospathjoinBASE_DIRrec_model_dirZcls_model_dirZrec_image_shapeZuse_onnxr   Zdet_algorithmSUPPORT_DET_MODELr   r   r   Zrec_algorithmSUPPORT_REC_MODELr:   r@   r   __file__parentdebugsuper__init__page_num)selfkwargsparamsr   r   det_model_configdet_urlrec_model_configrec_urlZcls_model_configZcls_url)	__class__rJ   rK   r     sX    


 



zPaddleOCR.__init__Tc             C   s  t |tjtttfstt |tr<|dkr<td t	d |dkrX| j
dkrXtd t|}t |tr| jt|ks| jdkrt|| _|d| j }n|g}|r|rg }xDt|D ]8\}}| ||\}}	}
dd t||	D }|| qW |S |rJ|sJg }x<t|D ]0\}}| |\}}d	d |D }|| qW |S g }g }xnt|D ]b\}}t |tsv|g}| j
r|r| |\}}}|s|| | |\}	}||	 q\W |s|S |S dS )
uY  
        ocr with paddleocr
        args：
            img: img for ocr, support ndarray, img_path and list or ndarray
            det: use text detection or not. If false, only rec will be exec. Default is True
            rec: use text recognition or not. If false, only det will be exec. Default is True
            cls: use angle classifier or not. Default is True. If true, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
        Tz.When input a list of images, det must be falser   FzoSince the angle classifier is not initialized, the angle classifier will not be uesd during the forward processNc             S   s   g | ]\}}|  |gqS rJ   )tolist).0boxresrJ   rJ   rK   
<listcomp>,  s   z!PaddleOCR.ocr.<locals>.<listcomp>c             S   s   g | ]}|  qS rJ   )r   )r   r   rJ   rJ   rK   r   4  s    )r   r   r   listr@   r   r   r   r   r   r   warningr   r   r   	enumerate__call__zipappendZtext_detectorZtext_classifierZtext_recognizer)r   r   r+   r,   r-   ZimgsZocr_residxZdt_boxesZrec_res_Ztmp_resZelapseZcls_resZcls_res_tmprJ   rJ   rK   r7     sT    	



zPaddleOCR.ocr)TTT)__name__
__module____qualname__r   r7   __classcell__rJ   rJ   )r   rK   r     s   <c                   s*   e Zd Z fddZd fdd	Z  ZS )r   c                s  t dd}|jjf | |jtks4tdt|jt|j|_d|_	|j
sXttj t|j\}}|dkrtd}nd}|jdkrd|_td|jd	|}t|jtjtd
d	||d \|_}td|jd|}t|jtjtd
d||d \|_}	td|jd|}
t|jtjtd
d|
d \|_}td|jd|}t|jtjtd
d|d \|_}t|j| t|j|	 t|j| t|j| |jd krtt t!j"|d  |_|j#d krtt t!j"|
d  |_#|j$d krtt t!j"|d  |_$t%| t& '| d S )NF)rG   z(structure_version must in {}, but get {}r.   r   r   zPP-Structurer3   r+   r   r   r,   r4   r1   r2   r    )(rE   r   r   Zstructure_versionrB   r   r   r   r   moder   r   r   r   r   r   r   Zmerge_no_span_structurer   r   r   r   r   r   r   r   r   Ztable_model_dirZlayout_model_dirr   r:   r@   r   r   r   r;   r<   r   r   r   )r   r   r   r   r   Z
table_langr   r   r   r   Ztable_model_configZ	table_urlZlayout_model_configZ
layout_url)r   rJ   rK   r   I  sd    



 
zPPStructure.__init__Fr   c                s"   t |}t j|||d\}}|S )N)img_idx)r   r   r   )r   r   Zreturn_ocr_result_in_tabler   r   r   )r   rJ   rK   r     s    zPPStructure.__call__)Fr   )r   r   r   r   r   r   rJ   rJ   )r   rK   r   H  s   :c        !      C   s  t dd} | j}t|r*t|d dg}n
t| j}t|dkrVtd| j d S | j	dkrnt
f | j}n| j	dkrtf | j}ntx:|D ]0}tj|dd }td	d
|d
 | j	dkr*|j|| j| j| jd}|d k	rx6tt|D ]&}|| }x|D ]}	t|	 qW qW q| j	dkrt|\}
}}|sX|sXt|}
| jr| jr|rddlm} tj| j d|}||}|!| |"  td| q|s|
d krtd| q||
gg}nrg }xlt#|
D ]`\}}tj$tj| j |dd tj| j ||d t%| d }t&|| |'||g qW g }xt#|D ]\}\}}
td|d t| tj|dd }|||d}t(|| j || | jrj|g krjddl)m*} ddl+m,} |
j-\}}}||}|||}||7 }qjW | jr|g kry ddl+m.} ||
|| j | W n8 t/k
r } ztd|| wW d d }~X Y nX x,|D ]$} | 0d | 0d t|  qW td| j  qW d S )NT)rG   ztmp.jpgr   zno images find in {}r7   r.   r   z{}{}{}z
**********)r+   r,   r-   )	Converterz{}.docxzdocx save to {}zerror in loading image:{})exist_okr   z.jpgzprocessing {}/{} page:   )r   )deepcopy)sorted_layout_boxes)convert_info_docxz.error in layout recovery image:{}, err msg: {}r   r   zresult save to {})1rE   	image_dirr   r   r   r   r   r   r   r5   r   r   r   r   r   r   basenamesplitinfor7   r+   r,   r   ranger   r   ZimreadZrecoveryZuse_pdf2docx_apiZpdf2docx.converterr   r   outputr   closer   makedirsr@   Zimwriter   r   copyr   Z$ppstructure.recovery.recovery_to_docr   r   r   	Exceptionpop)!argsr   Zimage_file_listZengineZimg_pathZimg_nameresultr   r   liner   r   r   r   Z	docx_fileZcvZ	img_pathsindexZpdf_imgZpdf_img_pathZall_resZnew_img_pathZnew_img_namer   r   hwr   Z	result_cpZresult_sortedr   exitemrJ   rJ   rK   main  s    

















r   )T)Fr   r   	importlibr   dirnamer   __dir__Zpaddler   r   r   r   Znumpyr   pathlibr   r   ior   ZPILr   import_moduler   r   r	   Ztools.inferr
   Zppocr.utils.loggingr   r   Zppocr.utils.utilityr   r   Zppocr.utils.networkr   r   r   r   Ztools.infer.utilityr   r   r   Zppstructure.utilityr   r   Zppstructure.predict_systemr   r   r   __all__r   VERSIONr   
expanduserr   r   rA   r   rB   r   rE   r   r   r   r   r   Z
TextSystemr   r   r   rJ   rJ   rJ   rK   <module>   s  








)&(wB