o
    {qi                     @   s\   d dl mZmZ d dlmZ ddlmZmZ ddlm	Z	 G dd deZ
G dd	 d	eZd
S )   )get_subcommand_argsstr2bool)logger   )PaddleXPipelineWrapperPipelineCLISubcommandExecutor)create_config_from_structurec                       s  e Zd Z																																																																d fdd	Zedd Zdddddddddddddddddddddddddddddddd	d
ZdddddddddddddddddddddddddddddddddZddddddddddd
ddZddddddddddd
ddZ	dd Z
dd Zedd Zdd Z  ZS )PPDocTranslationNcA           C         s:   t   }B|Bd |Bd |B| _t jdi |A d S )Nselfkwargs )localscopypop_paramssuper__init__)Cr
   layout_detection_model_namelayout_detection_model_dirlayout_threshold
layout_nmslayout_unclip_ratiolayout_merge_bboxes_modechart_recognition_model_namechart_recognition_model_dirchart_recognition_batch_sizeregion_detection_model_nameregion_detection_model_dir#doc_orientation_classify_model_name"doc_orientation_classify_model_dirdoc_unwarping_model_namedoc_unwarping_model_dirtext_detection_model_nametext_detection_model_dirtext_det_limit_side_lentext_det_limit_typetext_det_threshtext_det_box_threshtext_det_unclip_ratiotextline_orientation_model_nametextline_orientation_model_dirtextline_orientation_batch_sizetext_recognition_model_nametext_recognition_model_dirtext_recognition_batch_sizetext_rec_score_threshtable_classification_model_nametable_classification_model_dir,wired_table_structure_recognition_model_name+wired_table_structure_recognition_model_dir/wireless_table_structure_recognition_model_name.wireless_table_structure_recognition_model_dir&wired_table_cells_detection_model_name%wired_table_cells_detection_model_dir)wireless_table_cells_detection_model_name(wireless_table_cells_detection_model_dir%table_orientation_classify_model_name$table_orientation_classify_model_dirseal_text_detection_model_nameseal_text_detection_model_dirseal_det_limit_side_lenseal_det_limit_typeseal_det_threshseal_det_box_threshseal_det_unclip_ratio seal_text_recognition_model_nameseal_text_recognition_model_dir seal_text_recognition_batch_sizeseal_rec_score_threshformula_recognition_model_nameformula_recognition_model_dirformula_recognition_batch_sizeuse_doc_orientation_classifyuse_doc_unwarpinguse_textline_orientationuse_seal_recognitionuse_table_recognitionuse_formula_recognitionuse_chart_recognitionuse_region_detectionchat_bot_configr   params	__class__r   m/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/paddleocr/_pipelines/pp_doctranslation.pyr      s
   
D

zPPDocTranslation.__init__c                 C      dS )NzPP-DocTranslationr   r
   r   r   rV   _paddlex_pipeline_named      z'PPDocTranslation._paddlex_pipeline_nameFT)rJ   rK   rL   rM   rN   rO   rP   rQ   r   r   r   r   r$   r%   r&   r'   r(   r/   r>   r?   r@   rA   rB   rF   #use_wired_table_cells_trans_to_html&use_wireless_table_cells_trans_to_htmluse_table_orientation_classify use_ocr_results_with_table_cellsuse_e2e_wired_table_rec_model use_e2e_wireless_table_rec_modelc          !      K   s   | j j|fi d|d|d|d|d|d|d|d|	d	|
d
|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|| S NrJ   rK   rL   rM   rN   rO   rP   rQ   r   r   r   r   r$   r%   r&   r'   r(   r/   r>   r?   r@   rA   rB   rF   r[   r\   r]   r^   r_   r`   )paddlex_pipelinevisual_predict!r
   inputrJ   rK   rL   rM   rN   rO   rP   rQ   r   r   r   r   r$   r%   r&   r'   r(   r/   r>   r?   r@   rA   rB   rF   r[   r\   r]   r^   r_   r`   r   r   r   rV   visual_predict_iterh   s   $	
z$PPDocTranslation.visual_predict_iterc          !      K   s   t | j|fi d|d|d|d|d|d|d|d|	d	|
d
|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|d|| S ra   )listrf   rd   r   r   rV   rc      s   $	
zPPDocTranslation.visual_predictzhi  g        
target_language
chunk_sizetask_descriptionoutput_format	rules_strfew_shot_demo_text_contentfew_shot_demo_key_value_listglossaryllm_request_intervalrR   c       
         K   s*   | j j|f||||||||	|
|d
|S Nri   )rb   	translater
   ori_md_info_listrj   rk   rl   rm   rn   ro   rp   rq   rr   rR   r   r   r   rV   translate_iter   s    zPPDocTranslation.translate_iterc       
         K   s,   t | j|f||||||||	|
|d
|S rs   )rg   rw   ru   r   r   rV   rt     s$   zPPDocTranslation.translatec                 C      | j |S N)rb   load_from_markdown)r
   re   r   r   rV   rz   8     z#PPDocTranslation.load_from_markdownc                 C   rx   ry   )rb   concatenate_markdown_pages)r
   Zmarkdown_listr   r   rV   r|   ;  r{   z+PPDocTranslation.concatenate_markdown_pagesc                 C   s   t  S ry   )%PPDocTranslationCLISubcommandExecutor)clsr   r   rV   get_cli_subcommand_executor>  s   z,PPDocTranslation.get_cli_subcommand_executorc                 C   s  i d| j d d| j d d| j d p| j d d| j d d| j d	 d
| j d d| j d d| j d d| j d d| j d d| j d d| j d d| j d d| j d d| j d d| j d d | j d! i d"| j d# d$| j d% d&| j d' d(| j d) d*| j d+ d,| j d- d.| j d/ d0| j d1 d2| j d3 d4| j d5 d6| j d7 d8| j d9 d:| j d; d<| j d= d>| j d? d@| j dA dB| j dC i dD| j dE dF| j dG dH| j dI dJ| j dK dL| j dM dN| j dO dP| j dQ dR| j dS dT| j dU dV| j dW dX| j dY dZ| j d[ d\| j d] d^| j d_ d`| j da db| j dc dd| j d1 i de| j d3 df| j d5 dg| j d7 dh| j d9 di| j d; dj| j d= dk| j d? dl| j dA dm| j dC dn| j dE do| j dG dp| j dI dq| j dK dr| j ds dt| j du dv| j d5 dw| j dx | j dy | j dz | j d{ | j d| | j d} | j d~ | j d | j d | j d | j d d
}t|S )NzSSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.use_doc_orientation_classifyrJ   zHSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.use_doc_unwarpingrK   z.SubPipelines.LayoutParser.use_doc_preprocessorzJSubPipelines.LayoutParser.SubPipelines.GeneralOCR.use_textline_orientationrL   z.SubPipelines.LayoutParser.use_seal_recognitionrM   z/SubPipelines.LayoutParser.use_table_recognitionrN   z1SubPipelines.LayoutParser.use_formula_recognitionrO   z/SubPipelines.LayoutParser.use_chart_recognitionrP   z.SubPipelines.LayoutParser.use_region_detectionrQ   z?SubPipelines.LayoutParser.SubModules.LayoutDetection.model_namer   z>SubPipelines.LayoutParser.SubModules.LayoutDetection.model_dirr   z>SubPipelines.LayoutParser.SubModules.LayoutDetection.thresholdr   z?SubPipelines.LayoutParser.SubModules.LayoutDetection.layout_nmsr   zHSubPipelines.LayoutParser.SubModules.LayoutDetection.layout_unclip_ratior   zMSubPipelines.LayoutParser.SubModules.LayoutDetection.layout_merge_bboxes_moder   z@SubPipelines.LayoutParser.SubModules.ChartRecognition.model_namer   z?SubPipelines.LayoutParser.SubModules.ChartRecognition.model_dirr   z@SubPipelines.LayoutParser.SubModules.ChartRecognition.batch_sizer   z?SubPipelines.LayoutParser.SubModules.RegionDetection.model_namer   z>SubPipelines.LayoutParser.SubModules.RegionDetection.model_dirr   zcSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_namer   zbSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocOrientationClassify.model_dirr   zYSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocUnwarping.model_namer    zXSubPipelines.LayoutParser.SubPipelines.DocPreprocessor.SubModules.DocUnwarping.model_dirr!   zUSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.model_namer"   zTSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.model_dirr#   zYSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.limit_side_lenr$   zUSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.limit_typer%   zQSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.threshr&   zUSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.box_threshr'   zWSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextDetection.unclip_ratior(   z[SubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.model_namer)   zZSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.model_dirr*   z[SubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.batch_sizer+   zWSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.model_namer,   zVSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.model_dirr-   zWSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.batch_sizer.   zYSubPipelines.LayoutParser.SubPipelines.GeneralOCR.SubModules.TextRecognition.score_threshr/   zaSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.TableClassification.model_namer0   z`SubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.TableClassification.model_dirr1   zlSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.WiredTableStructureRecognition.model_namer2   zkSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.WiredTableStructureRecognition.model_dirr3   zoSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.WirelessTableStructureRecognition.model_namer4   znSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.WirelessTableStructureRecognition.model_dirr5   zfSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.WiredTableCellsDetection.model_namer6   zeSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.WiredTableCellsDetection.model_dirr7   ziSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.WirelessTableCellsDetection.model_namer8   zhSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.WirelessTableCellsDetection.model_dirr9   zfSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.TableOrientationClassify.model_namer:   zeSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubModules.TableOrientationClassify.model_dirr;   zsSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextDetection.model_namezrSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextDetection.model_dirzwSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextDetection.limit_side_lenzsSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextDetection.limit_typezoSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextDetection.threshzsSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextDetection.box_threshzuSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextDetection.unclip_ratiozySubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.model_namezxSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.model_dirzySubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextLineOrientation.batch_sizezuSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextRecognition.model_nameztSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextRecognition.model_dirzuSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextRecognition.batch_sizezwSubPipelines.LayoutParser.SubPipelines.TableRecognition.SubPipelines.GeneralOCR.SubModules.TextRecognition.score_threshzoSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.model_namer<   znSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.model_dirr=   zsSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.limit_side_lenzoSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.limit_typer?   r@   rA   rB   rC   rD   rE   rG   rH   rI   rR   )
zkSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.threshzoSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.box_threshzqSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextDetection.unclip_ratiozqSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextRecognition.model_namezpSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextRecognition.model_dirzqSubPipelines.LayoutParser.SubPipelines.SealRecognition.SubPipelines.SealOCR.SubModules.TextRecognition.batch_sizezbSubPipelines.LayoutParser.SubPipelines.FormulaRecognition.SubModules.FormulaRecognition.model_namezaSubPipelines.LayoutParser.SubPipelines.FormulaRecognition.SubModules.FormulaRecognition.model_dirzbSubPipelines.LayoutParser.SubPipelines.FormulaRecognition.SubModules.FormulaRecognition.batch_sizezSubModules.LLM_Chat)r   r   )r
   Z	STRUCTUREr   r   rV   _get_paddlex_config_overridesB  s   #&),/258;>ADGJMPSVY\_behknqtwz}       
                "  %  (  +  .  1  4  7  :  =  @  C  F  I  L  O  lz.PPDocTranslation._get_paddlex_config_overrides)@NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN)__name__
__module____qualname__r   propertyrY   rf   rc   rw   rt   rz   r|   classmethodr   r   __classcell__r   r   rT   rV   r	      s:   K
KM#!
r	   c                   @   s(   e Zd Zedd Zdd Zdd ZdS )r}   c                 C   rW   )NZpp_doctranslationr   rX   r   r   rV   subparser_name3  rZ   z4PPDocTranslationCLISubcommandExecutor.subparser_namec                 C   s  |j ddtddd |j dtdd |j d	td
dd |j dtdd |j dtdd |j dtdd |j dtdd |j dtdd |j dtdd |j dtdd |j dtdd |j dtdd |j dtd d |j d!td"d |j d#td$d |j d%td&d |j d'td(d |j d)td*d |j d+td,d |j d-td.d |j d/td0d |j d1td2d |j d3td4d |j d5td6d |j d7td8d |j d9td:d |j d;td<d |j d=td>d |j d?td@d |j dAtdBd |j dCtdDd |j dEtdFd |j dGtdHd |j dItdJd |j dKtdLd |j dMtdNd |j dOtdPd |j dQtdNd |j dRtdSd |j dTtdUd |j dVtdWd |j dXtdYd |j dZtd[d |j d\td]d |j d^td_d |j d`tdad |j dbtdcd |j ddtded |j dftdgd |j dhtdid |j djtdkd |j dltdmd |j dntdod |j dptdqd |j drtdsd |j dttdud |j dvtdwd |j dxtdyd |j dztd{d |j d|td}d |j d~tdd |j dtdd |j dtdd |j dtdd |j dtdd d S )Nz-iz--inputTzInput path or URL.)typerequiredhelpz--save_pathzPath to the output directory.)r   r   z--target_languagerh   zTarget language.)r   defaultr   z--layout_detection_model_namez#Name of the layout detection model.z--layout_detection_model_dirz-Path to the layout detection model directory.z--layout_thresholdz/Score threshold for the layout detection model.z--layout_nmsz'Whether to use NMS in layout detection.z--layout_unclip_ratioz+Expansion coefficient for layout detection.z--layout_merge_bboxes_modez!Overlapping box filtering method.z--chart_recognition_model_namez$Name of the chart recognition model.z--chart_recognition_model_dirz.Path to the chart recognition model directory.z--chart_recognition_batch_sizez+Batch size for the chart recognition model.z--region_detection_model_namez#Name of the region detection model.z--region_detection_model_dirz-Path to the region detection model directory.z%--doc_orientation_classify_model_namez<Name of the document image orientation classification model.z$--doc_orientation_classify_model_dirzFPath to the document image orientation classification model directory.z--doc_unwarping_model_namez'Name of the text image unwarping model.z--doc_unwarping_model_dirz,Path to the image unwarping model directory.z--text_detection_model_namez!Name of the text detection model.z--text_detection_model_dirz+Path to the text detection model directory.z--text_det_limit_side_lenzUThis sets a limit on the side length of the input image for the text detection model.z--text_det_limit_typezxThis determines how the side length limit is applied to the input image before feeding it into the text deteciton model.z--text_det_threshzDetection pixel threshold for the text detection model. Pixels with scores greater than this threshold in the output probability map are considered text pixels.z--text_det_box_threshzDetection box threshold for the text detection model. A detection result is considered a text region if the average score of all pixels within the border of the result is greater than this threshold.z--text_det_unclip_ratiozText detection expansion coefficient, which expands the text region using this method. The larger the value, the larger the expansion area.z!--textline_orientation_model_namez7Name of the text line orientation classification model.z --textline_orientation_model_dirz;Path to the text line orientation classification directory.z!--textline_orientation_batch_sizez>Batch size for the text line orientation classification model.z--text_recognition_model_namez#Name of the text recognition model.z--text_recognition_model_dirz-Path to the text recognition model directory.z--text_recognition_batch_sizez*Batch size for the text recognition model.z--text_rec_score_threshzrText recognition threshold used in general OCR. Text results with scores greater than this threshold are retained.z!--table_classification_model_namez'Name of the table classification model.z --table_classification_model_dirz1Path to the table classification model directory.z.--wired_table_structure_recognition_model_namez4Name of the wired table structure recognition model.z---wired_table_structure_recognition_model_dirz>Path to the wired table structure recognition model directory.z1--wireless_table_structure_recognition_model_namez7Name of the wireless table structure recognition model.z0--wireless_table_structure_recognition_model_dirz(--wired_table_cells_detection_model_namez.Name of the wired table cells detection model.z'--wired_table_cells_detection_model_dirz8Path to the wired table cells detection model directory.z+--wireless_table_cells_detection_model_namez1Name of the wireless table cells detection model.z*--wireless_table_cells_detection_model_dirz;Path to the wireless table cells detection model directory.z --seal_text_detection_model_namez&Name of the seal text detection model.z--seal_text_detection_model_dirz0Path to the seal text detection model directory.z--seal_det_limit_side_lenzZThis sets a limit on the side length of the input image for the seal text detection model.z--seal_det_limit_typez}This determines how the side length limit is applied to the input image before feeding it into the seal text deteciton model.z--seal_det_threshzDetection pixel threshold for the seal text detection model. Pixels with scores greater than this threshold in the output probability map are considered text pixels.z--seal_det_box_threshzDetection box threshold for the seal text detection model. A detection result is considered a text region if the average score of all pixels within the border of the result is greater than this threshold.z--seal_det_unclip_ratiozSeal text detection expansion coefficient, which expands the text region using this method. The larger the value, the larger the expansion area.z"--seal_text_recognition_model_namez(Name of the seal text recognition model.z!--seal_text_recognition_model_dirz2Path to the seal text recognition model directory.z"--seal_text_recognition_batch_sizez/Batch size for the seal text recognition model.z--seal_rec_score_threshzcSeal text recognition threshold. Text results with scores greater than this threshold are retained.z --formula_recognition_model_namez&Name of the formula recognition model.z--formula_recognition_model_dirz0Path to the formula recognition model directory.z --formula_recognition_batch_sizez-Batch size for the formula recognition model.z--use_doc_orientation_classifyz9Whether to use document image orientation classification.z--use_doc_unwarpingz$Whether to use text image unwarping.z--use_textline_orientationz4Whether to use text line orientation classification.z--use_seal_recognitionz Whether to use seal recognition.z--use_table_recognitionz!Whether to use table recognition.z--use_formula_recognitionz#Whether to use formula recognition.z--use_chart_recognitionz!Whether to use chart recognition.z--use_region_detectionz Whether to use region detection.z--qianfan_api_keyz&Configuration for the embedding model.)add_argumentstrfloatr   int)r
   Z	subparserr   r   rV   _update_subparser7  s  
z7PPDocTranslationCLISubcommandExecutor._update_subparserc                 C   s   t |}|d}|d}|d}|d}|d ur&dddd|d	|d
< tdi |}td ||}g }	|D ]}
|	|
d j |rN|
d | q;td |j	|	|d}|D ]}
|

  |rj|
| q]d S )Nre   rj   	save_pathqianfan_api_keyZchat_botzernie-3.5-8kzhttps://qianfan.baidubce.com/v2Zopenai)module_nameZ
model_namebase_urlZapi_typeZapi_keyrR   zStart analyzing imagesZlayout_parsing_resultzStart translation)rj   r   )r   r   r	   r   inforf   appendmarkdownZsave_allrw   printZsave_to_markdown)r
   argsrS   re   rj   r   r   ZchatocrZresult_visualrv   resZresult_translater   r   rV   execute_with_args  s@   








z7PPDocTranslationCLISubcommandExecutor.execute_with_argsN)r   r   r   r   r   r   r   r   r   r   rV   r}   2  s    
  Yr}   N)Z
_utils.clir   r   Z_utils.loggingr   baser   r   utilsr   r	   r}   r   r   r   rV   <module>   s       