o
    pi}s                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ ddlm	Z	m
Z
 ddlmZmZmZmZ dd	lmZmZ d
ZdZdd ZG dd dZG dd dZG dd dZdS )    N)IrGraph)core)quant_layers   )QuantWeightPassReplaceFakeQuantDequantPass)_get_input_name_index_get_op_input_var_names_get_output_name_index$move_persistable_var_to_global_block   )
fuse_utilsutils.pdmodelz
.pdiparamsc                 C   sP   ddl m} |jjjj| d< |jjjj| d< ||jj ||jj | |fS )Nr   )fleetColumnParallelLinearRowParallelLinear)Zpaddle.distributedr   Zmeta_parallelZparallel_layersZ	mp_layersr   r   append)layer_name_mapfake_quant_input_layersr    r   i/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/paddle/quantization/imperative/qat.pylazy_import_fleet&   s   

r   c                       sP   e Zd ZdZg ddddddddddddf fd	d
	Zdd ZdddZ  ZS )ImperativeQuantAwarezJ
    Applying quantization aware training (QAT) to the dygraph model.
    )Conv2DLinearConv2DTransposer   r   abs_maxmoving_average_abs_max   ?FNc                    sL   t    || _||||||||	|
|d
}tdi || _t|||| _dS )a  
        The constructor for ImperativeQuantAware.

        Args:
            quantizable_layer_type(list[str | layer]): List the type of
                layers that will be quantized. Default is ['Conv2D', 'Linear'].
            weight_quantize_type(str): quantization type for weights,
                which supports 'abs_max' and 'channel_wise_abs_max'.
            activation_quantize_type(str): quantization type for activations,
                which supports 'abs_max' and 'moving_average_abs_max' now.
                If using 'abs_max' mode, the quantization scale will be
                calculated dynamically each step in both training and testing
                period. If using 'moving_average_abs_max', the static
                quantization scale will be calculated during training and
                used in inference.
            weight_bits(int): quantization bit number for weights, whereas
                the bias is not quantized.
            activation_bits(int): quantization bit number for activations.
            moving_rate(float): the parameter for 'moving_average_abs_max'
                quantization.
            fuse_conv_bn(bool): Whether to fuse conv and bn, default is False.
            weight_preprocess_layer(paddle.nn.Layer, optional): A paddle
                Layer that defines how to preprocess weight before quantization.
                Using this can quickly test if user's preprocess method works
                or not. The input is non-quantized weight and function returns
                processed weight to be quantized.
                If None, the weight will be quantized directly.
                Default is None.
            act_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer
                that defines how to preprocess activation before quantization.
                Using this can quickly test if user's preprocess method works
                or not. The input is non-quantized activation and function returns
                processed activation to be quantized.
                If None, the activation will be quantized directly.
                Default is None.
            weight_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that
                defines how to quantize weight.
                Using this can quickly test if user's quantization method works or not.
                In this layer, user should both define quantization method and
                dequantization method, that is, the function's input is non-quantized
                weight and returns dequantized weight.
                If None, will use quantization op defined by 'weight_quantize_type'.
                Default is None.
            act_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that defines
                how to quantize activation.
                Using this can quickly test if user's quantization method works or not.
                In this layer, user should both define quantization method and
                dequantization method, that is, the function's input is non-quantized
                activation and returns dequantized activation.
                If None, will use quantization op defined by 'activation_quantize_type'.
                Default is None.
            onnx_format (bool, optional): Whether to export the quantized model
                with format of ONNX. Default is False.

        Note:
            If user sets attribute 'skip_quant' to a Layer that support dynamic
            quantization and sets it to true, the layer would not be quantized
            during training. If this attribute is not sets or the attribute is
            false, the Layer would be quantized in training.

        Examples:
            .. code-block:: python

                >>> import paddle
                >>> from paddle.static.quantization import (
                ...     ImperativeQuantAware,
                ... )
                >>> from paddle.vision.models import (
                ...     resnet,
                ... )

                >>> model = resnet.resnet50(pretrained=True)

                >>> imperative_qat = ImperativeQuantAware(
                ...     weight_quantize_type='abs_max',
                ...     activation_quantize_type='moving_average_abs_max')

                >>> # Add the fake quant logical.
                >>> # The original model will be rewrite.
                >>> # The outscale of outputs in supported layers would be calculated.
                >>> imperative_qat.quantize(model)

                >>> # Fine-tune the quantized model
                >>> # ...

                >>> # Save quant model for the inference.
                >>> imperative_qat.save_quantized_model(
                ...     layer=model,
                ...     model_path="./resnet50_qat",
                ...     input_spec=[
                ...         paddle.static.InputSpec(
                ...         shape=[None, 3, 224, 224], dtype='float32')])

            .. code-block:: python

                >>> import paddle
                >>> from paddle.static.quantization import (
                ...     ImperativeQuantAware,
                ... )

                >>> class ImperativeModel(paddle.nn.Layer):
                ...     def __init__(self):
                ...         super().__init__()
                ...         # self.linear_0 would skip the quantization.
                ...         self.linear_0 = paddle.nn.Linear(784, 400)
                ...         self.linear_0.skip_quant = True

                ...         # self.linear_1 would not skip the quantization.
                ...         self.linear_1 = paddle.nn.Linear(400, 10)
                ...         self.linear_1.skip_quant = False

                ...     def forward(self, inputs):
                ...         x = self.linear_0(inputs)
                ...         x = self.linear_1(inputs)
                ...         return x

                >>> model = ImperativeModel()
                >>> imperative_qat = ImperativeQuantAware(
                ...     weight_quantize_type='abs_max',
                ...     activation_quantize_type='moving_average_abs_max')

                >>> # Add the fake quant logical.
                >>> # The original model will be rewrite.
                >>> #
                >>> # There is only one Layer(self.linear1) would be added the
                >>> # fake quant logical.
                >>> imperative_qat.quantize(model)

                >>> # Fine-tune the quantized model
                >>> # ...

                >>> # Save quant model for the inference.
                >>> imperative_qat.save_quantized_model(
                ...    layer=model,
                ...    model_path="./imperative_model_qat")
        )
quantizable_layer_typeweight_quantize_typeactivation_quantize_typeweight_bitsactivation_bitsmoving_rateweight_preprocess_layeract_preprocess_layerweight_quantize_layeract_quantize_layerNr   )super__init__fuse_conv_bnImperativeQuantizeInputs_quantize_inputsImperativeQuantizeOutputs_quantize_outputs)selfr!   r"   r#   r$   r%   r&   r-   r'   r(   r)   r*   onnx_formatkwargs	__class__r   r   r,   9   s$    

zImperativeQuantAware.__init__c                 C   sB   t |tjjsJ d| jrt| | j| | j| |S )a  
        According to weights' and activations' quantization types,
        the model will be added some fake quant ops, such as
        fake_quantize_dequantize_moving_average_abs_max,
        fake_quantize_dequantize_abs_max and so on. At the same time,
        the out_scale value of outputs would be calculated.

        Args:
            model(paddle.nn.Layer): the model to be quantized.
        Returns:
            None

        Examples:
            .. code-block:: python

                >>> import paddle
                >>> from paddle.static.quantization import (
                ...     ImperativeQuantAware,
                ... )

                >>> class ImperativeModel(paddle.nn.Layer):
                ...     def __init__(self):
                ...         super().__init__()
                ...         # self.linear_0 would skip the quantization.
                ...         self.linear_0 = paddle.nn.Linear(784, 400)
                ...         self.linear_0.skip_quant = True

                ...         # self.linear_1 would not skip the quantization.
                ...         self.linear_1 = paddle.nn.Linear(400, 10)
                ...         self.linear_1.skip_quant = False

                ...     def forward(self, inputs):
                ...         x = self.linear_0(inputs)
                ...         x = self.linear_1(inputs)
                ...         return x

                >>> model = ImperativeModel()
                >>> imperative_qat = ImperativeQuantAware(
                ...     weight_quantize_type='abs_max',
                ...     activation_quantize_type='moving_average_abs_max')

                >>> # Add the fake quant logical.
                >>> # The original model will be rewrite.
                >>> #
                >>> # There is only one Layer(self.linear1) would be added the
                >>> # fake quant logical.
                >>> imperative_qat.quantize(model)
        2The model must be the instance of paddle.nn.Layer.)	
isinstancepaddlennLayerr-   r   r/   applyr1   )r2   modelr   r   r   quantize   s   1
zImperativeQuantAware.quantizec                 K   sH   t j  | jj|||fi | W d    d S 1 sw   Y  d S N)r9   Z	pir_utilsZ
OldIrGuardr1   save_quantized_model)r2   layerpath
input_specconfigr   r   r   r@   (  s   "z)ImperativeQuantAware.save_quantized_modelr?   )__name__
__module____qualname____doc__r,   r>   r@   __classcell__r   r   r5   r   r   4   s$     4<r   c                
       sJ   e Zd ZdZg ddddddddddf
 fdd		Zd
d Zdd Z  ZS )r.   z
    Based on the input params, add the quant_dequant computational
    logic both for activation inputs and weight inputs.
    )r   r   r   r   r   r   r    Nc              
      s<  t    ttjtj\ _ _t fdd|D  _ jD ]}t|t	s+| jv s2J | dqh d}ddh}|dkrC||v sKJ d| d||v sWJ d	| d
dd }||scJ d||skJ ddd }||swJ d||sJ d||	sJ d||
sJ d||||||||	|
d	 _
dS )zz
        The constructor for ImperativeQuantizeInputs.

        Please refer to the args of ImperativeQuantAware.
        c                 3   s(    | ]}| j v r j | n|V  qd S r?   )r   ).0rA   r2   r   r   	<genexpr>L  s    


z4ImperativeQuantizeInputs.__init__.<locals>.<genexpr>  is unsupported to be quantized.>   Zchannel_wise_lsq_weightZchannel_wise_abs_maxr   Z
lsq_weightr   r   Zlsq_actz"Unsupported weight_quantize_type: z1. It can only be abs_max or channel_wise_abs_max.z&Unsupported activation_quantize_type: z7. It can only be moving_average_abs_max or lsq_act now.c                 S   s   t | to| dko| dkS )Nr      )r8   int)bitsr   r   r   <lambda>p  s    z3ImperativeQuantizeInputs.__init__.<locals>.<lambda>z%weight_bits should be 1, 2,... or 16.z)activation_bits should be 1, 2,... or 16.c                 S   s   | d u p
t | tjjS r?   )
issubclassr9   r:   r;   )methodr   r   r   rQ   w  s   
 z%weight_preprocess should be nn.Layer.z"act_preprocess should be nn.Layer.z#weight_quantize should be nn.Layer.z act_quantize should be nn.Layer.)	r"   r#   r$   r%   r&   Zweight_pre_layerZact_pre_layerZweight_quant_layerZact_quant_layerN)r+   r,   r   r   r   r   tuple_quantizable_layer_typer8   str_kwargs)r2   r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   rA   Zquantize_typeZact_quantize_typeZ
bits_checkZlayer_checkr5   rK   r   r,   5  sf   











z!ImperativeQuantizeInputs.__init__c                 C   st   t |tjjsJ d| D ](\}}t || jr#t|dr$|jdu r$qt	||\}}| 
|}t||| qdS )a  
        Quantize the weights and activations to calculate for specific
        layers.

        Args:
            model(paddle.nn.Layer): The target model which would
                calculate the input quantization scale.

        Returns:
            None
        r7   
skip_quantTN)r8   r9   r:   r;   named_sublayersrU   hasattrrX   r   find_parent_layer_and_sub_name_get_input_quantized_layersetattr)r2   r=   name	cur_layerparent_layersub_namecur_quant_layerr   r   r   r<     s   

zImperativeQuantizeInputs.applyc                 C   sb   d }| j  D ]\}}t||rd| } nq|d us%J d|  dtj| |fi | jS )NZ	Quantizedz
The layer rM   )r   itemsr8   Z	full_namer   __dict__rW   )r2   rA   Zquant_layer_namekeyvaluer   r   r   r\     s   

z3ImperativeQuantizeInputs._get_input_quantized_layer)rE   rF   rG   rH   r,   r<   r\   rI   r   r   r5   r   r.   /  s    ^r.   c                       sT   e Zd ZdZd fdd	Zdd Zdd
dZdd Zdd Zdd Z	dd Z
  ZS )r0   z8
    Calculate the output scales for target layers.
    r    r   Fc                    s    t    || _|| _|| _dS )a4  
        The constructor for ImperativeQuantizeOutputs.

        Args:
            moving_rate(float): The decay coefficient of moving average.
                                The default value is 0.9.
            activation_bits(int, optional): quantization bit number for activation. Default is 8.
        N)r+   r,   _moving_rate_activation_bits_onnx_format)r2   r&   r%   r3   r5   r   r   r,     s   
	
z"ImperativeQuantizeOutputs.__init__c                 C   s   t |tjjsJ d| D ]:\}}d|v rq| |sqt||\}}d}t |ttj	r:t
j|| j|d}n	t
j|| j|d}t||| qdS )aB  
        Insert the `moving_average_abs_max_scale` layers to calculate the
        output scales for specific layers in the dygraph model.

        Args:
            model(paddle.nn.Layer): The target model which would be
                calculate the output quantization scale.

        Returns:
            None
        r7   Z_act_preprocessN)reduce_type)r8   r9   r:   r;   rY   _is_target_layerr   r[   rT   Zfake_quant_output_layersr   ZFakeQuantMAOutputScaleLayerrg   ZMAOutputScaleLayerr]   )r2   r=   Zcur_namer_   r`   ra   rj   rb   r   r   r   r<     s*   
zImperativeQuantizeOutputs.applyNc                    s8  t |tjjsJ d|rtjj||d tjjd|||d| d}t r.d}t  t	
 }tj }tj|}tj|}	tj|}
|
t }|
t }tjj|	|||d\ }}| js|  || tt	 jdd}| D ]}| D ]}| dkr|| q||  qv|  |    d}n;tt	 jdd}t!||| j"d	}| D ]
}d|_#|$| qt%||}| D ]
}d|_#|$| q|  d}t&  d
}|d
u rd}n|'dr|(ddd }n|}tj)|	|} fdd|D }tjj*|||| + |d |rt,  d
S d
S )a  
        Save the quantized model for the inference.

        Args:
            model (Layer): The model to be saved.
            path (str): The path prefix to save model. The format is
                ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input
                of the saved model's forward method, which can be described by
                InputSpec or example Tensor. If None, all input variables of
                the original Layer's forward method would be the inputs of
                the saved model. Default None.
            **config (dict, optional): Other save configuration options for
                compatibility. We do not recommend using these configurations,
                they may be removed in the future. If not necessary, DO NOT use
                them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of
                the saved model. By default, all return variables of original
                Layer's forward method are kept as the output of the saved model.
                If the provided ``output_spec`` list is not all output variables,
                the saved model will be pruned according to the given
                ``output_spec`` list.

        Returns:
            None
        r7   )rC   )rA   rB   rC   FT)executormodel_filenameparams_filename)Zfor_testmoving_average_abs_max_scale)Z
quant_bitsNr=   r   .r   r   c                    s   g | ]	}   |qS r   )Zglobal_blockvar)rJ   r^   Zinfer_programr   r   
<listcomp>b  s    zBImperativeQuantizeOutputs.save_quantized_model.<locals>.<listcomp>)rl   program
clip_extrar   )-r8   r9   r:   r;   ZjitZ	to_staticsaveZin_dynamic_modeZenable_staticr   ZCPUPlaceZstaticZglobal_scopeExecutorosrB   dirnamebasenameINFER_MODEL_SUFFIXINFER_PARAMS_SUFFIXZload_inference_modelri   _gather_scalesr   ZGraphZdescZall_sub_graphsZall_op_nodesr^   Zsafe_remove_nodesZresolve_hazardZ
to_program_set_skip_quant_attrr   rh   Z	_for_testr<   r   r   endswithrsplitjoinZsave_inference_modelcloneZdisable_static)r2   r=   rB   rC   rD   Zis_dynamic_modeZplacescopeexery   rz   rm   rn   Zfeed_target_namesfetch_targetsgraphZ	sub_graphZ_opru   Ztransform_passZquant_weight_passZ
model_nameZpath_prefixZ	feed_varsr   rr   r   r@     s   






	z.ImperativeQuantizeOutputs.save_quantized_modelc                 C   s~   t |tjjs	dS | jrt |ttjrdS dS d}t|r)t |ttj	s)d}t |ttjr3d}t |tjj
jr=d}|S )zE
        Whether the layer needs to calculate output scales.
        FT)r8   r9   r:   r;   ri   rT   r   Zfake_quant_wrap_layersZis_leaf_layerZfake_quant_leaf_layersZquantZFloatFunctionalLayer)r2   rA   flagr   r   r   rk   q  s$   
z*ImperativeQuantizeOutputs._is_target_layerc                    s.   fdd} fdd}|  |  dS )z
        Get all scales from fake ops, save them into the corresponding ops
        and delete all moving_average_abs_max_scale ops.
        c            
         s   g } g t jd} jD ]}|jD ]}|j|vr| | qq| D ]I}t|D ]B}t |j|}|d urid|jv s>|jdkri|	dd }t 
|}t |}t||\}}	||t|	 d | |dd q'q!d S )Nro   Zquantize_dequantizeOutScaler   
_thresholdwith_quant_attrT)r   !fake_quantize_dequantize_op_typesblocksopstyper   r	   find_previous_opblockoutputload_variable_datafp_numpy_to_naiver   	_set_attrrV   )

target_opsZskip_opsr   opin_var_nameprevious_opZ
scale_nameZin_scaleargnameindex)rt   r   r   r   _gather_input_scale  s:   






zEImperativeQuantizeOutputs._gather_scales.<locals>._gather_input_scalec                     s4  g } j D ]}|jD ]}|jdkr| | q
q| D ]}}|dd }|dd }|j}t||}t	||}|dd }t
|}t|}|jdkrut||}	|	d uru|	\}
}||
t| d | |d| |d	d
 |D ]}||| tt D ]} | j|kr|| |< qqwqd S )Nro   Xr   ZOutr   feedr   Zout_thresholdr   T)r   r   r   r   inputr   r   r   r   Zfind_next_opsr   r   r
   r   rV   Z_rename_inputrangelenr^   rq   )r   r   r   r   Zout_var_namer   Znext_opsZout_scale_nameZ	out_scaleresr   r   Znext_opir   rt   r   r   r   _gather_output_scale  sD   






zFImperativeQuantizeOutputs._gather_scales.<locals>._gather_output_scaleNr   )r2   rt   r   r   r   r   r   r   r   r}     s   $
z(ImperativeQuantizeOutputs._gather_scalesc                 C   s@   |j D ]}|jD ]}| ||r|dd |dd qqdS )z/
        Label the skip quantized ops.
        rX   Tr   N)r   r   _is_skip_quant_opr   )r2   rt   r   r   r   r   r   r~     s   

z.ImperativeQuantizeOutputs._set_skip_quant_attrc                    s<   g d}|j |vrdS  fdd|jD }tdd |D S )z
        The input op should be skipped quantization.
        1. the type of input op should be conv2d, depthwise_conv2d or matmul
        2. the previous ops of the input op are not fake_quantize_dequantize ops
        )Zconv2dZdepthwise_conv2dmatmulZconv2d_transposeFc                    s   g | ]}t  |qS r   )r   r   )rJ   Zarg_namer   r   r   rs     s    
z?ImperativeQuantizeOutputs._is_skip_quant_op.<locals>.<listcomp>c                 s   s$    | ]}|d uo|j tjvV  qd S r?   )r   r   r   )rJ   r   r   r   r   rL     s    

z>ImperativeQuantizeOutputs._is_skip_quant_op.<locals>.<genexpr>)r   Zinput_arg_namesany)r2   r   Zin_opZtarget_op_typesZprevious_opsr   r   r   r     s   

z+ImperativeQuantizeOutputs._is_skip_quant_op)r    r   Fr?   )rE   rF   rG   rH   r,   r<   r@   rk   r}   r~   r   rI   r   r   r5   r   r0     s    
'wI
r0   )rx   r9   Zpaddle.base.frameworkr   Zpaddle.frameworkr   Zpaddle.nn.quantr   Z%static.quantization.quantization_passr   r   Zstatic.quantization.utilsr   r	   r
   r    r   r   r{   r|   r   r   r.   r0   r   r   r   r   <module>   s     | 