o
    + iQI                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ ddlm	Z	 ddl
mZmZmZmZ ddlmZmZmZmZmZ ddlmZ d	Zd
Ze	eejddZG dd dZdS )    N)quant_layers   )
get_logger)_get_input_name_index_get_op_input_var_names_get_op_output_var_names_get_output_name_index   )
fuse_utils
ptq_config	ptq_hooksptq_quantizerutils)PTQRegistry.pdmodelz
.pdiparamsz&%(asctime)s-%(levelname)s: %(message)s)fmtc                       s   e Zd ZdZejf fdd	ZdddZddd	Zd
d Z	dd Z
dd Zdd Zdd Zdd Zdd Zedd Zedd Z  ZS )ImperativePTQz,
    Static post training quantization.
    c                    s$   t    t|tjsJ || _dS )aQ  
        Constructor.

        Args:
            quant_config(PTQConfig): the config of post training quantization.
                The config has weight_quantizer and activation_quantizer.
                In default, the weight_quantizer is PerChannelAbsmaxQuantizer
                and the activation_quantizer is KLQuantizer.
        N)super__init__
isinstancer   Z	PTQConfig_quant_config)selfquant_config	__class__ n/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/quantization/imperative/ptq.pyr   /   s   


zImperativePTQ.__init__FNc           
      C   s   t |tjjsJ d|st|}|r|  t||}|	 D ]8\}}t
|rZt|rZ| |sZt| j}t
|rCd|_||_tj}||}	|	|_|jj|	jdd q"|S )a  
        Add quant config and hook to the target layer.

        Args:
            model(paddle.nn.Layer): The model to be quantized.
            inplace(bool): Whether apply quantization to the input model.
                           Default: False.
            fuse(bool): Whether to fuse layers.
                        Default: False.
            fuse_list(list): The layers' names to be fused. For example,
                "fuse_list = [["conv1", "bn1"], ["conv2", "bn2"]]".
                A TypeError would be raised if "fuse" was set as
                True but "fuse_list" was None.
                Default: None.
        Return
            quantized_model(paddle.nn.Layer): The quantized model.
        2The model must be the instance of paddle.nn.Layer.TF)last)r   paddlennLayercopydeepcopyevalr
   Zfuse_layersnamed_sublayersr   Zis_supported_layerr   Zis_leaf_layer_is_skip_layerr   is_simulated_quant_layerenable_in_act_quantizerr   Zquant_forward_post_hookZregister_forward_post_hookquant_hook_handleZ_forward_post_hooksmove_to_endZ_hook_id)
r   modelZinplaceZfuseZ	fuse_listnamelayerr   hookr)   r   r   r   quantize?   s6   


zImperativePTQ.quantizec                    sR  t |tjjsJ d| | tjjd|||d| d}t r)d}t  t	 }tj
 }tj
|}tj|}	tj|}
|
t }|
t }tj
j|	|||d\ }}|   |  | |   d}|du rrd}n|dr|d	d
d }n|}tj|	|} fdd|D }tj
j||||  d |rt  dS dS )a  
        1. Convert the quantized model
        2. Call jit.save to save the inference model
        3. Post process the inference model.

        Args:
            model (Layer): The model to be saved.
            path (str): The path prefix to save model. The format is
                ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input
                of the saved model's forward method, which can be described by
                InputSpec or example Tensor. If None, all input variables of
                the original Layer's forward method would be the inputs of
                the saved model. Default None.
            **config (dict, optional): Other save configuration options for
                compatibility. We do not recommend using these configurations,
                they may be removed in the future. If not necessary, DO NOT use
                them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of
                the saved model. By default, all return variables of original
                Layer's forward method are kept as the output of the saved model.
                If the provided ``output_spec`` list is not all output variables,
                the saved model will be pruned according to the given
                ``output_spec`` list.

        Returns:
            None
        r   )r-   path
input_specFT)path_prefixexecutormodel_filenameparams_filenameNr+   r   .r	   r   c                    s   g | ]	}   |qS r   )Zglobal_blockvar).0r,   Zinfer_programr   r   
<listcomp>   s    z6ImperativePTQ.save_quantized_model.<locals>.<listcomp>)r3   programr   )r   r   r    r!   _convertZjitsaveZin_dynamic_modeZenable_staticZCPUPlaceZstaticZglobal_scopeExecutorosr0   dirnamebasenameINFER_MODEL_SUFFIXINFER_PARAMS_SUFFIXZload_inference_model	_clean_up_gather_input_thresholds_remove_scale_opendswithrsplitjoinZsave_inference_modelcloneZdisable_static)r   r+   r0   r1   configZis_dynamic_modeZplacescopeexer@   rA   r4   r5   Zfeed_target_namesZfetch_targetsZ
model_namer2   Z	feed_varsr   r9   r   save_quantized_modelo   sb   


	


z"ImperativePTQ.save_quantized_modelc                 C   sj   |  D ]\}}| |r|jj  q| | |  D ]\}}| |r-| ||j q| | dS )a  
        Convert the quantized model.

        Args:
            model(paddle.nn.Layer): The quantized model.
            inplace(bool): Whether apply conversion to the input model.
                           Default: False.
        Returns:
            None
        N)r%   _is_quant_layerr   r)   remove_cal_thresholds_save_output_thresholds_wrap_simulated_layers)r   r+   r,   	sub_layerr   r   r   r<      s   


zImperativePTQ._convertc                 C   s   t |tjjsJ dd}d}| D ]\}}| |r |d7 }q| D ]D\}}| |ri|d7 }|d dkrDtd| d| d |j}|j	rO|j
  |j  t|ri|jf}|j|| |j  q%dS )	z
        Calculate the thresholds of inputs and outputs.

        Args:
            model(paddle.nn.Layer): The quantized model.
        Returns:
            None
        8The input model must be the instance of paddle.nn.Layer.r   r	      zProcess the z / z layerN)r   r   r    r!   r%   rO   _loggerinfor   r(   in_act_quantizerZcal_thresholdsout_act_quantizerr   r'   weightwt_quantizerZsample_data)r   r+   Z	total_numZcur_numr,   rT   r   weightsr   r   r   rQ      s0   	





zImperativePTQ._cal_thresholdsc                 C   s   t |tjjsJ dt|}|j}|jj}t	|dksJ t	|dkrC|d t
d d }|||d i |d|d i dS td|d  dt	|  dS )	z
        Save the output thresholds to the layer.

        Args:
            sub_layer(paddle.nn.Layer): The quantized layer.
            quant_config(PTQConfig): the quant config for the layer.
        Returns:
            None
        rU   r	   r   
_thresholdout_thresholdzoutput_thresholds shape of z need to be 1, but received N)r   r   r    r!   r   
layer_infooutput_namesrZ   
thresholdslenstrZ_set_op_attrsrW   warning)r   rT   r   r`   ra   Zoutput_thresholdsZ	save_namer   r   r   rR   	  s   

z%ImperativePTQ._save_output_thresholdsc                 C   s  t |tjjsJ d| D ]\}}| |rt|r|j}|j	du s'J |j
}|j}d}tj D ]\}}	t ||	rCd| } nq4|dusJJ t |tjrSd}
nd}
|
d|j|jd}tj| |fi |}t|d	spJ t|jd
sxJ t|jdkrtj|jd gtjd}|jj| t|dsJ t|jd
sJ t|jdksJ |jd }t |trtj|tjd}n	tj|gtjd}|jj| | || t ||\}}t!||| qdS )z
        Replace conv2d and linear with the quantized layers, and save
        thresholds into the fake layers.
        Args:
            model(paddle.nn.Layer): The model to be quantized.
        Returns:
            None
        rU   TNZ	QuantizedZabs_maxZchannel_wise_abs_maxZmoving_average_abs_max)weight_quantize_typeZactivation_quantize_typeZweight_bitsZactivation_bits_fake_quant_input_scaler	   r   )Zdtype_fake_quant_weight)"r   r   r    r!   r%   rO   r   r'   r   r(   r\   rY   r   Zlayer_name_mapitemsr   ZAbsmaxQuantizerZ
quant_bitsr   __dict__hasattrrg   rc   rb   nparrayZfloat32rh   	set_valueri   listrR   Zfind_parent_layer_and_sub_namesetattr)r   r+   r,   rT   r   r\   rY   Zquant_layer_namekeyvaluerf   kwargsZquant_layerZinput_thresholdZweight_thresholdZparent_layerZsub_namer   r   r   rS   %  s~   	


z$ImperativePTQ._wrap_simulated_layersc                 C   s(  t |D ]}t|D ]}t |j|}|du rqd|jv s#|jdkrO|dd }t ||}t |}t	||\}}	|
|t|	 d | |
dd qt|D ]<}
|
|krZqSt||
\}}	|t|	 d }||soqS||}t	||\}}	|t|	 d }|
|| |
dd qSqqdS )	z
        Get and save input thresholds from the front ops.

        Args:
            program(Program): the input infer program.
            scope(Scope): the corresponding scope for the program.
        Returns:
            None
        Nquantize_dequantizemoving_average_abs_max_scaleZOutScaler   r^   with_quant_attrT)r   program_all_opsr   Zfind_previous_opblocktypeoutputZload_variable_dataZfp_numpy_to_naiver   	_set_attrrd   r   r   has_attrattr)r   r;   rL   opin_var_nameZprevious_op	attr_nameZin_thresholdargnameindexout_var_name	thresholdr   r   r   rE   v  s@   





z&ImperativePTQ._gather_input_thresholdsc                 C   s  dd }t |D ]v}d|jv r |jD ]}d|v r|| qq	|jdv r|jdkr,dnd}||d	 }t |j|}t|d
ksI|d	 jdkrJq	|d	 }t	||\}	}
|	t
|
 d }t	||dd	 \}	}
|	t
|
 d }||||| |||dd q	dS )z
        Remove useless thresholds which are added in jit.save.

        Args:
            program(Program): the input infer program.
        Returns:
            None
        c                 S   sn   |  |r1| |r3| |||kr5| |}| | || ||| |dd d S d S d S d S )Nrw   T)r}   r~   _remove_attrr|   )r   next_opold_attr_namenew_attr_namer   r   r   r   _helper  s   


z(ImperativePTQ._clean_up.<locals>._helperru   r^   )conv2dmatmulr   OutputOutr   r	   Zelementwise_addr_   N)r   rx   rz   Z
attr_namesr   r{   find_next_opsry   rc   r   rd   )r   r;   r   r   r   Zarg_namer   next_opsr   r   r   r   r   r   r   r   rD     s2   




zImperativePTQ._clean_upc                 C   s^   t |D ]'}|jdkr,|dd }|dd }t |j|}|D ]}||| q#qdS )z=
        Remove the moving_average_abs_max_scale op.
        rv   Xr   r   N)r   rx   rz   inputr{   r   ry   Z_rename_input)r   r;   r   r   r   r   r   r   r   r   rF     s   
zImperativePTQ._remove_scale_opc                 C   s   t | do	| jdu S )N
skip_quantT)rl   r   r-   r   r   r   r&     s   zImperativePTQ._is_skip_layerc                 C   s
   t | dS )Nr   )rl   r   r   r   r   rO     s   
zImperativePTQ._is_quant_layer)FFN)N)__name__
__module____qualname____doc__r   Zdefault_ptq_configr   r/   rN   r<   rQ   rR   rS   rE   rD   rF   staticmethodr&   rO   __classcell__r   r   r   r   r   *   s     

0^$Q-0
r   )r"   loggingr?   numpyrm   r   Zpaddle.nn.quantr   Zstatic.log_helperr   Zstatic.quantization.utilsr   r   r   r    r
   r   r   r   r   Zptq_registryr   rB   rC   r   INFOrW   r   r   r   r   r   <module>   s    