o
    rqi5                     @   s  d dl Z d dlmZmZmZ d dlZd dlZd dlZd dlm	Z	 d dlm
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d
dlmZ d
dlmZ eejejG dd dZdede
j
fddZejejejdG dd deZ ejejej!dG dd deZ"ejejej#dG dd deZ$ejejej%dG dd deZ&ejejej'dG dd deZ(ejejej)dG dd deZ*ejejej+dG d d! d!eZ,ejejej-dG d"d# d#eZ.dS )$    N)AnyDictUnion)ndarray)ImageImageOps)File)Preprocessors)	InputKeys)Fields)type_assert   )Preprocessor)PREPROCESSORSc                   @   sd   e Zd ZdZdddZdeeeeef f fddZd	d
 Z	e
defddZe
defddZdS )	LoadImagea  Load an image from file or url.
    Added or updated keys are "filename", "img", "img_shape",
    "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`),
    "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1).
    Args:
        mode (str): See :ref:`PIL.Mode<https://pillow.readthedocs.io/en/stable/handbook/concepts.html#modes>`.
        backend (str): Type of loading image. Should be: cv2 or pillow. Default is pillow.
    rgbpillowc                 C   s   |  | _|| _d S )N)uppermodebackend)selfr   r    r   e/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/modelscope/preprocessors/image.py__init__   s   

zLoadImage.__init__inputc                 C   sd  t |tr
|d }n|}| jdkrVt|}||2}t|tj}| j	dkr0t
|tj| |jd |jd |jd }}}|||f}	W d   n1 sPw   Y  nC| jdkrt|}
t|
}t|}t|}|| j	}W d   n1 sw   Y  |jd |jd d	f}	ntd
| j |||	dd}t |tr| }|| |}|S )zCall functions to load image and get image meta information.
        Args:
            input (str or dict): input image path or input dict with
                a key `filename`.
        Returns:
            dict: The dict contains loaded image.
        filenamecv2RGBr   r      Nr      z/backend should be either cv2 or pillow,but got img)r   r    	img_shapeZ	img_field)
isinstancedictr   r   Z_get_storageZas_local_pathr   ZimreadZIMREAD_COLORr   cvtColorZCOLOR_BGR2RGBshapereadioBytesIOr   openr   Zexif_transposeconvertsize	TypeErrorcopyupdate)r   r   image_path_or_urlZstorageZimg_pathr    Zimg_hZimg_wZimg_cr!   bytesinfileresultsZ	input_retr   r   r   __call__#   sD   




"





zLoadImage.__call__c                 C   s   | j j d| j d}|S )Nz(mode=))	__class____name__r   )r   repr_strr   r   r   __repr__R   s   zLoadImage.__repr__returnc                 C   s   t | trtt| }|S t | tjjrt| d}|S t | tjrCt	| j
dkr3t| tj} | d d d d d d df }|S t | trZ| tjd }|rXtt|}|S tdt|  )Nr   r   9input should be either str, PIL.Image, np.array, but got )r"   strnparray
load_imagePILr   r*   r   lenr%   r   r$   COLOR_GRAY2BGRr   getr
   IMAGEr,   typer   r    r   r   r   convert_to_ndarrayV   s&   

zLoadImage.convert_to_ndarrayc                 C   s   t | trt| }|S t | tjjr| d}|S t | tjrHt| j	dkr-t
| t
j}| d d d d d d df }t|dd}|S t | tr\| tjd }|rZt|}|S tdt|  )Nr   r   r:   Zuint8r;   )r"   r<   r?   r@   r   r*   r=   r   rA   r%   r   r$   rB   Z	fromarrayastyper   rC   r
   rD   r,   rE   rF   r   r   r   convert_to_imgi   s(   


zLoadImage.convert_to_imgN)r   r   )r6   
__module____qualname____doc__r   r   r<   r   r3   r8   staticmethodr   rG   rI   r   r   r   r   r      s    
	/r   r/   r9   c                 C   s   t  }|| d S )z simple interface to load an image from file or url

    Args:
        image_path_or_url (str): image file path or http url
    r    )r   )r/   loaderr   r   r   r?   ~   s   r?   )module_namec                       sD   e Zd Zd fdd	Zeeedejdee	ef fddZ
  ZS )	"ObjectDetectionTinynasPreprocessor    c                    s   t  jdi | || _dS )a  Preprocess the image.

        What this preprocessor will do:
        1. Transpose the image matrix to make the channel the first dim.
        2. If the size_divisible is gt than 0, it will be used to pad the image.
        3. Expand an extra image dim as dim 0.

        Args:
            size_divisible (int): The number will be used as a length unit to pad the image.
                Formula: int(math.ceil(shape / size_divisible) * size_divisible)
                Default 32.
        Nr   )superr   size_divisible)r   rS   kwargsr5   r   r   r      s   
z+ObjectDetectionTinynasPreprocessor.__init__datar9   c                 C   s   | tj}|d}|j}| jdkr@ddl}| j}t|}t|	|d | | |d< t|	|d | | |d< t
|}t| tj}||ddd|jd d|jd f< t|d}d|iS )zPreprocess the image.

        Args:
            data: The input image with 3 dimensions.

        Returns:
            The processed data in dict.
            {'img': np.ndarray}

        )r   r   r   r   Nr   r   r    )rH   r=   Zfloat32Z	transposer%   rS   mathlistintceiltupleZzerosZexpand_dims)r   rV   imager%   rW   ZstrideZpad_imgr   r   r   r3      s   

&z+ObjectDetectionTinynasPreprocessor.__call__)rQ   )r6   rJ   rK   r   r   objectr=   r   r   r<   r3   __classcell__r   r   rU   r   rP      s    &rP   c                       sN   e Zd Zdef fddZeeedeeef deeef fddZ	  Z
S )%ImageColorEnhanceFinetunePreprocessor	model_dirc                       t  j|i | || _dS )zmpreprocess the data from the `model_dir` path

        Args:
            model_dir (str): model path
        NrR   r   r`   r   r`   argsrT   rU   r   r   r      s   
z.ImageColorEnhanceFinetunePreprocessor.__init__rV   r9   c                 C      |S )a  process the raw input data

        Args:
            data (tuple): [sentence1, sentence2]
                sentence1 (str): a sentence
                    Example:
                        'you are so handsome.'
                sentence2 (str): a sentence
                    Example:
                        'you are so beautiful.'
        Returns:
            Dict[str, Any]: the preprocessed data
        r   r   rV   r   r   r   r3      s   z.ImageColorEnhanceFinetunePreprocessor.__call__)r6   rJ   rK   r<   r   r   r]   r   r   r3   r^   r   r   rU   r   r_      s    
,r_   c                       D   e Zd Zdef fddZdeeef deeef fddZ  ZS )ImageDenoisePreprocessorr`   c                    :   t  j|i | || _ddlm} |ddgdg| _dS @

        Args:
            model_dir (str): model path
        r   )Filterr   target)Zreserved_keysNrR   r   r`   commonrl   _transformsr   r`   rd   rT   rl   rU   r   r   r         z!ImageDenoisePreprocessor.__init__rV   r9   c                 C      | j D ]}||}q|S zprocess the raw input data

        Args:
            data Dict[str, Any]

        Returns:
            Dict[str, Any]: the preprocessed data
        rp   r   rV   tr   r   r   r3         
	
z!ImageDenoisePreprocessor.__call__	r6   rJ   rK   r<   r   r   r   r3   r^   r   r   rU   r   rh          *rh   c                       rg   )ImageDeblurPreprocessorr`   c                    ri   rj   rn   rq   rU   r   r   r      rr   z ImageDeblurPreprocessor.__init__rV   r9   c                 C   rs   rt   ru   rv   r   r   r   r3     rx   z ImageDeblurPreprocessor.__call__ry   r   r   rU   r   r{      rz   r{   c                       rg   )$ImagePortraitEnhancementPreprocessorr`   c                    ra   rk   Nrb   rc   rU   r   r   r   !     
z-ImagePortraitEnhancementPreprocessor.__init__rV   r9   c                 C   re   rt   r   rf   r   r   r   r3   *     	z-ImagePortraitEnhancementPreprocessor.__call__ry   r   r   rU   r   r|     s    *	r|   c                       sL   e Zd Z fddZdd Zdd Zeeedee	e
f fdd	Z  ZS )
%ImageInstanceSegmentationPreprocessorc                    s   t  j|i | |dd| _|dd| _|dd| _g | _g | _ddlm	} | jdurJt
| jtr:| jg| _| jD ]}||}| j| q=| jdurjt
| jtrZ| jg| _| jD ]}||}| j| q]dS dS )zKimage instance segmentation preprocessor in the fine-tune scenario
        trainingTtrainNvalr   )build_preprocess_transform)rR   r   popr   preprocessor_train_cfgZpreprocessor_test_cfgtrain_transformstest_transformsZ9modelscope.models.cv.image_instance_segmentation.datasetsr   r"   r#   append)r   rd   rT   r   cfgZ	transformrU   r   r   r   ;  s*   





z.ImageInstanceSegmentationPreprocessor.__init__c                 C   
   d| _ d S NTr   r   r   r   r   r   Y     z+ImageInstanceSegmentationPreprocessor.trainc                 C   r   NFr   r   r   r   r   eval]  r   z*ImageInstanceSegmentationPreprocessor.evalr2   c                 C   s8   | j r| j}n| j}|D ]}||}|du r dS q|S zprocess the raw input data

        Args:
            results (dict): Result dict from loading pipeline.

        Returns:
            Dict[str, Any] | None: the preprocessed data
        N)r   r   r   )r   r2   Z
transformsrw   r   r   r   r3   a  s   z.ImageInstanceSegmentationPreprocessor.__call__)r6   rJ   rK   r   r   r   r   r]   r   r<   r   r3   r^   r   r   rU   r   r   6  s     r   c                       rg   )VideoSummarizationPreprocessorr`   c                    ra   r}   rb   rc   rU   r   r   r     r~   z'VideoSummarizationPreprocessor.__init__rV   r9   c                 C   re   rt   r   rf   r   r   r   r3     r   z'VideoSummarizationPreprocessor.__call__ry   r   r   rU   r   r   {  s    *	r   c                       sB   e Zd Z fddZdd Zdd Zdeeef fdd	Z	  Z
S )
%ImageClassificationBypassPreprocessorc                    s@   t  j|i | |dd| _|dd| _|dd| _dS )zKimage classification bypass preprocessor in the fine-tune scenario
        r   Tr   Nr   )rR   r   r   r   r   Zpreprocessor_val_cfg)r   rd   rT   rU   r   r   r     s   z.ImageClassificationBypassPreprocessor.__init__c                 C   r   r   r   r   r   r   r   r     r   z+ImageClassificationBypassPreprocessor.trainc                 C   r   r   r   r   r   r   r   r     r   z*ImageClassificationBypassPreprocessor.evalr2   c                 C   s   dS r   r   )r   r2   r   r   r   r3     r   z.ImageClassificationBypassPreprocessor.__call__)r6   rJ   rK   r   r   r   r   r<   r   r3   r^   r   r   rU   r   r     s
    	r   )/r'   typingr   r   r   r   numpyr=   r@   r   r   r   Zmodelscope.fileior   Zmodelscope.metainfor	   Zmodelscope.pipeline_inputsr
   Zmodelscope.utils.constantr   Zmodelscope.utils.type_assertr   baser   Zbuilderr   Zregister_moduleZcvr?   r   r<   Z%object_detection_tinynas_preprocessorrP   Z image_color_enhance_preprocessorr_   Zimage_denoise_preprocessorrh   Zimage_deblur_preprocessorr{   Z'image_portrait_enhancement_preprocessorr|   Z(image_instance_segmentation_preprocessorr   Z video_summarization_preprocessorr   Z(image_classification_bypass_preprocessorr   r   r   r   r   <module>   sh   i
/B