o
    yqiSU                     @   s   d dl mZmZmZmZmZ d dlZddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZmZm Z  ddl!m"Z" ej#G dd deZ$edddG dd deZ%dS )    )AnyDictListOptionalUnionN   )logging)pipeline_requires_extra   )ImageBatchSampler)	ReadImage)	benchmark)	HPIConfig)PaddlePredictorOption   )(AutoParallelImageSimpleInferencePipeline)BasePipeline)CropByPolysSortPolyBoxesSortQuadBoxescal_ocr_word_boxconvert_points_to_boxesrotate_image   )	OCRResultc                       s  e Zd ZdZ				d%dedee dee dedee	eee
f ef  d	df fd
dZdeej dee d	eej fddZded	efddZdee dee dee d	efddZ						d&dee dee dee dee dee dee d	efddZ											d'd e	eee ejeej f dee dee dee dee dee dee dee dee dee d!ee d"ee d	efd#d$Z  ZS )(_OCRPipelinezOCR PipelineNFconfigdevice	pp_optionuse_hpip
hpi_configreturnc           
   
      s\  t  j||||d |dd| _| jr&|di dddi}| || _|dd| _| jrB|d	i d
ddi}| || _|d	i dddi}|d | _	| j	dkr|dd| _
|dd| _|dd| _|dd| _|dd| _|dd| _|dd| _t | _tdd | _nI| j	d!kr|dd"| _
|dd#| _|dd| _|dd$| _|dd| _|dd%| _|dd| _t | _td&d | _ntd'| j	| j|| j
| j| j| j| j| j| jd(| _|d	i d)dd*i}	|	d+d,| _|	d-d.| _|	dd| _| j|	| jd/| _t|d0d1d2| _td3d4| _dS )5a  
        Initializes the class with given configurations and options.

        Args:
            config (Dict): Configuration dictionary containing various settings.
            device (str, optional): Device to run the predictions on. Defaults to None.
            pp_option (PaddlePredictorOption, optional): PaddlePredictor options. Defaults to None.
            use_hpip (bool, optional): Whether to use the high-performance
                inference plugin (HPIP) by default. Defaults to False.
            hpi_config (Optional[Union[Dict[str, Any], HPIConfig]], optional):
                The default high-performance inference configuration dictionary.
                Defaults to None.
        )r   r   r   r    use_doc_preprocessorTZSubPipelinesZDocPreprocessorZpipeline_config_errorz+config error for doc_preprocessor_pipeline!use_textline_orientationZ
SubModulesZTextLineOrientationZmodel_config_errorz,config error for textline_orientation_model!ZTextDetectionz config error for text_det_model!	text_typegenerallimit_side_leni  
limit_typemaxmax_side_limiti  threshg333333?
box_threshg333333?input_shapeNunclip_ratiog       @quad)Zdet_box_typeZseali  ming?g      ?ZpolyzUnsupported text type {})r&   r'   r)   r*   r+   r-   r,   ZTextRecognitionz config error for text_rec_model!Zscore_threshr   return_word_boxF)r,   
batch_sizer   )r1   ZBGR)format) super__init__getr"   Zcreate_pipelinedoc_preprocessor_pipeliner#   Zcreate_modeltextline_orientation_modelr$   text_det_limit_side_lentext_det_limit_typetext_det_max_side_limittext_det_threshtext_det_box_threshr,   text_det_unclip_ratior   _sort_boxesr   _crop_by_polysr   
ValueErrorr2   text_det_modeltext_rec_score_threshr0   text_rec_modelr   batch_samplerr   
img_reader)
selfr   r   r   r   r    Zdoc_preprocessor_configZtextline_orientation_configZtext_det_configZtext_rec_config	__class__ o/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/paddlex/inference/pipelines/ocr/pipeline.pyr4   +   s   


z_OCRPipeline.__init__image_array_listrotate_angle_listc           	      C   s   t |t |ksJ dt | dt | d|D ]}|dv s&J d| qg }t||D ]\}}|d }t||}|| q.|S )a  
        Rotate the given image arrays by their corresponding angles.
        0 corresponds to 0 degrees, 1 corresponds to 180 degrees.

        Args:
            image_array_list (List[np.ndarray]): A list of input image arrays to be rotated.
            rotate_angle_list (List[int]): A list of rotation indicators (0 or 1).
                                        0 means rotate by 0 degrees
                                        1 means rotate by 180 degrees

        Returns:
            List[np.ndarray]: A list of rotated image arrays.

        Raises:
            AssertionError: If any rotate_angle is not 0 or 1.
            AssertionError: If the lengths of input lists don't match.
        zLength of image_array_list (z*) must match length of rotate_angle_list ())r   r   z&rotate_angle must be 0 or 1, now it's    )lenzipr   append)	rF   rK   rL   ZangleZrotated_imagesZimage_arrayZrotate_indicatorZrotate_angleZrotated_imagerI   rI   rJ   r      s   
z_OCRPipeline.rotate_imagemodel_settingsc                 C   s<   |d r| j std dS |d r| jstd dS dS )a8  
        Check if the input parameters are valid based on the initialized models.

        Args:
            model_info_params(Dict): A dictionary containing input parameters.

        Returns:
            bool: True if all required models are initialized according to input parameters, False otherwise.
        r"   zRSet use_doc_preprocessor, but the models for doc preprocessor are not initialized.Fr#   z^Set use_textline_orientation, but the models for use_textline_orientation are not initialized.T)r"   r   errorr#   )rF   rR   rI   rI   rJ   check_model_settings_valid   s   z'_OCRPipeline.check_model_settings_validuse_doc_orientation_classifyuse_doc_unwarpingr#   c                 C   sL   |du r|du r| j }n|du s|du rd}nd}|du r | j}t||dS )a  
        Get the model settings based on the provided parameters or default values.

        Args:
            use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
            use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
            use_textline_orientation (Optional[bool]): Whether to use textline orientation.

        Returns:
            dict: A dictionary containing the model settings.
        NTF)r"   r#   )r"   r#   dict)rF   rU   rV   r#   r"   rI   rI   rJ   get_model_settings   s   z_OCRPipeline.get_model_settingsr8   r9   r:   r;   r<   r=   c                 C   sh   |du r| j }|du r| j}|du r| j}|du r| j}|du r#| j}|du r*| j}t||||||dS )a
  
        Get text detection parameters.

        If a parameter is None, its default value from the instance will be used.

        Args:
            text_det_limit_side_len (Optional[int]): The maximum side length of the text box.
            text_det_limit_type (Optional[str]): The type of limit to apply to the text box.
            text_det_max_side_limit (Optional[int]): The maximum side length of the text box.
            text_det_thresh (Optional[float]): The threshold for text detection.
            text_det_box_thresh (Optional[float]): The threshold for the bounding box.
            text_det_unclip_ratio (Optional[float]): The ratio for unclipping the text box.

        Returns:
            dict: A dictionary containing the text detection parameters.
        N)r&   r'   r*   r)   r+   r-   )r8   r9   r:   r;   r<   r=   rW   )rF   r8   r9   r:   r;   r<   r=   rI   rI   rJ   get_text_det_params   s(   z _OCRPipeline.get_text_det_paramsinputrB   r0   c           %      #   s    |||sddiV  |||||	|
du r#jdu r*jt|D ]\}}|j}d rKt	j
|||d}ndd |D }dd |D }t	j|fi }d	d |D fd
dD fddt|j|j|D }t	tt|}fdd|D }|rg }dg}|D ]}t	|| |  |  ||d t   qd rdd |D }||}ndgt| }t|D ]\}}|| }||| ||d   |d< qt|D ]\}}||| ||d    || }| }dd t D }t|dd d} fdd|D }tj|dD ]\}}|| d } |||  d< q8rUg |d< g |d< tt|D ]c}!||! d }|d krrt|d d ||! |d d \}"}#|d |" |d |# |d |d d  n	|d |d  |d  |d  |d! |d"  |d# ||!  q[q|D ].}jd$krt|d# }$|$|d%< rd&d |d D |d'< ntg |d%< t|V  qq1dS )(aQ  
        Predict OCR results based on input images or arrays with optional preprocessing steps.

        Args:
            input (Union[str, list[str], np.ndarray, list[np.ndarray]]): Input image of pdf path(s) or numpy array(s).
            use_doc_orientation_classify (Optional[bool]): Whether to use document orientation classification.
            use_doc_unwarping (Optional[bool]): Whether to use document unwarping.
            use_textline_orientation (Optional[bool]): Whether to use textline orientation prediction.
            text_det_limit_side_len (Optional[int]): Maximum side length for text detection.
            text_det_limit_type (Optional[str]): Type of limit to apply for text detection.
            text_det_max_side_limit (Optional[int]): Maximum side length for text detection.
            text_det_thresh (Optional[float]): Threshold for text detection.
            text_det_box_thresh (Optional[float]): Threshold for text detection boxes.
            text_det_unclip_ratio (Optional[float]): Ratio for unclipping text detection boxes.
            text_rec_score_thresh (Optional[float]): Score threshold for text recognition.
            return_word_box (Optional[bool]): Whether to return word boxes along with recognized texts.
        Returns:
            OCRResult: Generator yielding OCR results for each input image.
        rS   z0the input params for model settings are invalid!Nr"   )rU   rV   c                 S   s   g | ]}d |iqS Z
output_imgrI   ).0ZarrrI   rI   rJ   
<listcomp>_      z(_OCRPipeline.predict.<locals>.<listcomp>c                 S      g | ]}|d  qS r[   rI   r\   itemrI   rI   rJ   r]   a  s    c                 S   r_   )dt_polysrI   r`   rI   rI   rJ   r]   i  r^   c                    s   g | ]}  |qS rI   )r>   r`   rF   rI   rJ   r]   k  s    c                    s6   g | ]\}}}}|||| j g g g g d qS ))
input_path
page_indexdoc_preprocessor_resrb   rR   text_det_paramsr$   rB   r0   	rec_texts
rec_scores	rec_polys	vis_fonts)r$   )r\   rd   re   rf   rb   )rR   r0   rF   rg   rB   rI   rJ   r]   m  s"    
c                    s    g | ]}t  | d kr|qS )r   )rO   )r\   idx)dt_polys_listrI   rJ   r]     s     r   r#   c                 S   s   g | ]
}t |d  d qS )Z	class_idsr   )int)r\   Ztextline_angle_inforI   rI   rJ   r]     s    r   Ztextline_orientation_anglesc                 S   s.   g | ]\}}||j d  t|j d  dqS )r   r   )
sub_img_idsub_img_ratio)shapefloat)r\   Zimg_idZsub_imgrI   rI   rJ   r]     s    c                 S   s   | d S )Nrq   rI   )xrI   rI   rJ   <lambda>  s    z&_OCRPipeline.predict.<locals>.<lambda>)keyc                    s   g | ]} |d   qS )rp   rI   )r\   rt   )all_subs_of_imgrI   rJ   r]     s    )r0   rp   rec_resZ	text_wordZtext_word_regionZ	rec_scoreZrec_textrh   ri   rk   Zvis_fontrj   r%   	rec_boxesc                 S   s   g | ]}t |qS rI   )r   )r\   linerI   rI   rJ   r]     s    Ztext_word_boxes)rX   rT   rY   rB   r0   	enumeraterD   rE   Z	instanceslistr6   rA   rP   Zinput_pathsZpage_indexesrangerO   r?   extendrQ   r7   r   sortedrC   r   r$   r   nparrayr   )%rF   rZ   rU   rV   r#   r8   r9   r:   r;   r<   r=   rB   r0   _Z
batch_dataZimage_arraysZdoc_preprocessor_resultsZdoc_preprocessor_imagesZdet_resultsresultsindicesZall_subs_of_imgsZchunk_indicesrl   Zanglesiresrb   Zsub_img_info_listZsorted_subs_infoZsorted_subs_of_imgrx   rp   ZsnoZword_box_content_listZword_box_listry   rI   )rw   rm   rR   r0   rF   rg   rB   rJ   predict  s   #

	





 z_OCRPipeline.predict)NNFN)NNNNNN)NNNNNNNNNNN)__name__
__module____qualname____doc__r   r   strr   boolr   r   r   r4   r   r   Zndarrayro   r   rT   rW   rX   rs   rY   r   r   __classcell__rI   rI   rG   rJ   r   '   s    b
$
"
1	
r   Zocrzocr-core)Zaltc                   @   s$   e Zd ZdZedd Zdd ZdS )OCRPipelineZOCRc                 C   s   t S )N)r   rc   rI   rI   rJ   _pipeline_cls  s   zOCRPipeline._pipeline_clsc                 C   s   | ddS )Nr1   r   )r5   )rF   r   rI   rI   rJ   _get_batch_size  s   zOCRPipeline._get_batch_sizeN)r   r   r   entitiespropertyr   r   rI   rI   rI   rJ   r     s
    
r   )&typingr   r   r   r   r   numpyr   utilsr   Z
utils.depsr	   Zcommon.batch_samplerr   Zcommon.readerr   Zutils.benchmarkr   Z	utils.hpir   Zutils.pp_optionr   Z	_parallelr   baser   
componentsr   r   r   r   r   r   resultr   Ztime_methodsr   r   rI   rI   rI   rJ   <module>   s(       
>