o
    pi9@                    @   s  d dl Z d dlZd dlZzd dlmZ W n
   ddlmZ Y d dlZddlmZm	Z	 ddl
mZmZ ddlmZmZmZmZ ddlmZ d	d
lmZ ddlmZ ddlmZmZmZ eeejddZg dZddgZg dZg dZ dZ!dd Z"dd Z#G dd dZ$G dd dZ%G dd dZ&G dd  d Z'G d!d" d"Z(G d#d$ d$Z)G d%d& d&Z*G d'd( d(Z+G d)d* d*e$Z,G d+d, d,Z-G d-d. d.Z.G d/d0 d0Z/G d1d2 d2Z0G d3d4 d4Z1dS )5    N)tqdm      )IrGraphIrNode)_get_paddle_placecore)Programdataprogram_guardscope_guard)unique_name   )
get_logger)utils) SUPPORT_ACT_QUANTIZATION_OP_DICTSUPPORT_QUANTIZATION_OP_DICT#SUPPORT_WEIGHT_QUANTIZATION_OP_DICTz&%(asctime)s-%(levelname)s: %(message)s)fmt)fake_quantize_abs_maxfake_quantize_range_abs_max$fake_quantize_moving_average_abs_max"fake_channel_wise_quantize_abs_maxfake_dequantize_max_abs$fake_channel_wise_dequantize_max_abs)/fake_quantize_dequantize_moving_average_abs_maxZ-fake_channel_wise_quantize_dequantize_abs_maxZ fake_quantize_dequantize_abs_max)conv2ddepthwise_conv2dconv2d_transposegMbP?c                 C   sV   t |tjs
J d|d usJ d|d usJ d||   }||| d S )Nz(The type of value should be numpy array.The scope cannot be set None.The place cannot be set None.)
isinstancenpZndarrayvarname
get_tensorset)var_nodevaluescopeplacetensor r,   s/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/paddle/static/quantization/quantization_pass.py_init_var_nodeB   s   r.   c                 C   s4   d}t |D ]}| |j|}|o|  }q|S )zI
    Analyse the real inputs of the op node are all not persistable.
    T)r   _get_op_input_var_names_find_node_by_nameinputspersistable)graphop_nodeZis_input_all_not_persistablevar_namein_noder,   r,   r-   _is_input_all_not_persistableL   s   r7   c                   @   s   e Zd ZdZdddddddddgg ddddddddfd	d
Zdd Zdd Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* ZdS )+QuantizationTransformPasszk
    Quantize the ops that have weights. Add quant and dequant ops for
    the quantized ops's inputs.
    N   abs_max'  ?
skip_quantr   r   mulc                 C   s  || _ t|| _|| _|| _|	| _|| _|| _|| _|| _	|| _
|| _g d}|dks/J d||vr;td| d||vrGtd| d|| _|| _|| _|| _|
| _| jD ]}|tt v siJ |d qYd	d
 | jD | _|| _d| _i | _i | _dS )a  
        Constructor.

        Args:
            scope(static.Scope): When activation use 'range_abs_max' as the quantize
                type, this pass will create some new parameters. The scope is used to
                initialize these new parameters.
            place(static.CPUPlace|static.CUDAPlace|str): place is used to initialize new
                parameters described above. If it's string, It can be ``cpu``, and ``gpu:x``,
                where ``x`` is the index of the GPUs.
            weight_bits(int): quantization bit number for weights,
                the bias is not quantized.
            activation_bits(int): quantization bit number for activation.
            activation_quantize_type(str): quantization type for activation,
                now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
                If use 'abs_max' mode, the quantization scale will be calculated
                dynamically each step in both training and testing period. If use
                'range_abs_max', a static quantization scale will be calculated
                during training and used in inference.
            weight_quantize_type(str): quantization type for weights,
                support 'abs_max' and 'channel_wise_abs_max'. The 'range_abs_max'
                usually is not used for weight, since weights are fixed once the
                model is well trained.
            window_size(int): the window size for 'range_abs_max' quantization.
            moving_rate(float): the param for 'moving_average_abs_max' quantization.
            skip_pattern(str or str list): The user-defined quantization skip pattern, which
                will be presented in the name scope of an op. When the skip pattern is
                detected in an op's name scope, the corresponding op will not be quantized.
            quantizable_op_type(list[str]): List the type of ops that will be quantized.
                Default is ["conv2d", "depthwise_conv2d", "mul"]. The quantizable_op_type in
                QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
            weight_quantize_func(function): Function that defines how to quantize weight.
                Using this can quickly test if user's quantization method works or not.
                In this function, user should both define quantization function and
                dequantization function, that is, the function's input is non-quantized
                weight and function returns dequantized weight. If None, will use
                quantization op defined by 'weight_quantize_type'. Default is None.
            act_quantize_func(function): Function that defines how to quantize activation.
                Using this can quickly test if user's quantization method works or not.
                In this function, user should both define quantization and dequantization
                process, that is, the function's input is non-quantized activation and
                function returns dequantized activation. If None, will use quantization
                op defined by 'activation_quantize_type'. Default is None.
            weight_preprocess_func(function): Function that defines how to preprocess
                weight before quantization. Using this can quickly test if user's preprocess
                method works or not. The function's input is non-quantized weight and
                function returns processed weight to be quantized. If None, the weight will
                be quantized directly. Default is None.
            act_preprocess_func(function): Function that defines how to preprocess
                activation before quantization. Using this can quickly test if user's
                preprocess method works or not. The function's input is non-quantized
                activation and function returns processed activation to be quantized.
                If None, the activation will be quantized directly. Default is None.
            optimizer_func(function): Function return a optimizer. When 'is_test' is
                False and user want to use self-defined quantization function and
                preprocess function, this function must be set. Default is None.
            executor(base.Executor): If user want to use self-defined quantization
                function and preprocess function, executor must be set for initialization.
                Default is None.


        Examples:
            .. code-block:: python

                >>> # The original graph will be rewrite.
                >>> import paddle.static as static
                >>> from paddle.static.quantization import QuantizationTransformPass
                >>> from paddle.base.framework import IrGraph
                >>> from paddle.framework import core

                >>> graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
                >>> place = paddle.CPUPlace()
                >>> transform_pass = QuantizationTransformPass(static.global_scope(), place)
                >>> transform_pass.apply(graph)
        r:   channel_wise_abs_maxrange_abs_maxmoving_average_abs_maxrA   IThe activation quantization type does not support 'channel_wise_abs_max'.$Unknown activation_quantize_type : 'K'. It can only be 'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.Unknown weight_quantize_type: 'e'. It can only be 'abs_max' or 'channel_wise_abs_max' or 'range_abs_max' or 'moving_average_abs_max'.# is not supported for quantization.c                 S      g | ]}| d qS Z_gradr,   .0opr,   r,   r-   
<listcomp>       
z6QuantizationTransformPass.__init__.<locals>.<listcomp>N)_scoper   _place_weight_bits_activation_bits_skip_pattern_weight_quantize_func_act_quantize_func_weight_preprocess_func_act_preprocess_func
_optimizer_exe
ValueError_activation_quantize_type_weight_quantize_type_window_size_moving_rate_quantizable_opslistr   keys_quantizable_grad_ops_is_test_global_stepcreate_var_mapcreate_op_mapselfr)   r*   weight_bitsactivation_bitsZactivation_quantize_typeweight_quantize_typewindow_sizemoving_rateskip_patternquantizable_op_typeZweight_quantize_funcZact_quantize_funcZweight_preprocess_funcZact_preprocess_funcZoptimizer_funcexecutoris_test
quant_typerN   r,   r,   r-   __init___   sN   _





z"QuantizationTransformPass.__init__c           	         sl  t |ts	J djdu r| _t  dd | D g fdd} fdd} fd	d
}fdd}jsE| | }|D ]}|	 j
v s[|	 jv r_|| qKi |_tt|ddd'}|D ]}|	 j
v r||s||r||| |  qoW d   n1 sw   Y  |D ]}|	 jv r||r||| q|  |S )4  
        Quantize the graph for training process. According to weight and
        activation quantization type, the graph will be added some fake
        quantize operators and fake dequantize operators.

        Args:
            graph(IrGraph): the applied graph.
        Returns:
            None
        &graph must be the instance of IrGraph.Nc                 S      g | ]}|  qS r,   r$   rM   pr,   r,   r-   rO         z3QuantizationTransformPass.apply.<locals>.<listcomp>c                    s   d}t jtr  dot fddjD }nt jtr6  do5  djdk}|rJ  	dd   	dd d S d S )	NFop_namescopec                 3   "    | ]}|   d v V  qdS r}   NrN   attrrM   patternr4   r,   r-   	<genexpr>	  
    
zMQuantizationTransformPass.apply.<locals>._quant_preprocess.<locals>.<genexpr>r=   Twith_quant_attr
r!   rU   rb   rN   has_attranystrr   find	_set_attr)r4   user_skippedrj   r   r-   _quant_preprocess  s"   
z:QuantizationTransformPass.apply.<locals>._quant_preprocessc              	      s  |  dd |  dd |  d}|j}|D ]}| | vr'q|  v r4 |  }n| }|v r=q| v rEdnd}|rXjd urX| j||}n|shjd urh| j||}|r~j	d ur~| j	||}
| q|sjd ur| j||}
| q| v rjnj}	|rjnj}
|
dkr| }|dko|  d	}|r|d
 n|}|tjv rdnd}| |||	||\}}| ||g|	g||}n| |||	|
|\}}| |||	|}| |< | ||| qd S )Nquantization_typeqat_with_weightr   Top_roleFrA   	matmul_v2trans_y_trans_yr   r   )rN   r   r   r1   r$   input_arg_namesrX   _insert_funcrY   rV   appendrW   rS   rT   r^   r]   r   _channelwise_quant_axis1_ops_insert_channel_quant_op_insert_channel_dequant_op_insert_quant_op_insert_dequant_opupdate_input_link)r3   rN   r   r1   r'   dequant_var_noder$   	is_weighttarget_out_node
quant_bitsrt   op_typer   
quant_axisquant_var_nodescale_var_nodedequantized_varspersistable_varsprocessed_varsrj   r,   r-   _transform_forward  s   








		z;QuantizationTransformPass.apply.<locals>._transform_forwardc                    sH   |j D ]}| | vrq|  v r! |  }| ||| qd S N)r1   r$   r   r   )r3   rN   r'   r   )r   r,   r-   _transform_backward  s   
z<QuantizationTransformPass.apply.<locals>._transform_backwardc                    s>   d}| j D ]}| |  vrq| }|  v rd}q|S NFT)r1   r$   r   )rN   
has_weightr'   r$   )r   r,   r-   _has_weight  s   
z4QuantizationTransformPass.apply.<locals>._has_weight7Adding quant op with weight:|{bar}| {n_fmt}/{total_fmt}P   totalZ
bar_formatZncols)r!   r   re   rs   collectionsOrderedDictall_persistable_nodes_create_global_stepall_op_nodesr$   ra   rd   out_node_mapping_tabler   len_is_skip_quantupdateresolve_hazard)	rj   r3   r   r   r   r   opsrN   tr,   r   r-   apply   sP   

q




zQuantizationTransformPass.applyc                 C   s   | j dks
| jdkrkd}| D ]}| |kr|| _q| jd u rm|j|tjjj	dgtjjj
d}t|tjdgdd| j| j || }|jddtjjjd	d
|id|id}||| ||| || _d S d S d S )NrB   z@STEP_COUNTER@r   r$   var_typeshape	var_dtypeZint64dtype	incrementg      ?)stepr   XOutr   attrsr1   outputs)r^   r]   all_var_nodesr$   rf   create_persistable_noder   VarDescVarTypeDENSE_TENSORZINT64r.   r"   zerosrQ   rR   create_var_node_from_descr#   create_op_nodeop_proto_and_checker_makerOpRoleForwardlink_to)rj   r3   Zcounter_namenodeZglobal_step_inZglobal_step_outZincrement_opr,   r,   r-   r     sH   


	
z-QuantizationTransformPass._create_global_stepc                 C   sR   |dkr|  |||||S |dkr| |||||S |dkr'| |||||S dS )z7
        Insert fake_quantize_op in the graph.
        r:   rB   rC   N)_insert_quant_abs_max_op_insert_quant_range_abs_max_op'_insert_quant_moving_average_abs_max_op)rj   r3   r'   r$   r   rt   r   r,   r,   r-   r     s   


z*QuantizationTransformPass._insert_quant_opc                 C   s$  |  sJ |  d|j| || | | d}| |}| tj	kr.d}n| tj
kr8d}nd}zt| j| }	W n   tjdg|d}	Y |j|| dg| d}
t|
|	| j| j |jd||d	d
|i||
dd}||| ||| |||
 ||
fS )z?
        Insert fake_quantize_abs_max op in the graph.
         is not a varr   float64float32float16r   r   r   )
bit_lengthr   r   r   OutScaler   )is_varr$   create_var_node_quantized_var_nametyper   r   _quantized_scale_namepaddler   r   r"   arrayrQ   find_varr%   r   r   r.   rR   r   r   )rj   r3   r'   r$   r   r   r   
scale_name	data_typescale_valuer   quant_op_noder,   r,   r-   r     sH   
z2QuantizationTransformPass._insert_quant_abs_max_opc                 C   s   |  sJ |  d|j| || | | d}| |}| tj	kr.d}n| tj
kr8d}nd}zt| j| }	W n   tjtg|d}	Y |j|tjjjdg| d}
t|
|	| j| j ||
 }||
d}||d	}| js|jtd
tjjj| jg| d}| tj	krd}n| tj
krd}nd}t|tj| jg|d| j| j | j|d< ||d< | j|| j|d}|j d|||d}|!|| |!|
| |!|| |!|| | js|!| j| |!|| ||fS )zB
        Insert fake_quantize_range_abs_max on the graph.
        r   r   r   r   r   r   r   r   ZInScaler   scalesZIterZ	OutScales)rn   r   rs   r   r   r   )"r   r$   r   r   r   r   r   r   r   r   r   r"   r   rQ   r   r%   _SCALE_DEFAULT_VALUEr   r   r   r   r   r.   rR   r   r#   re   r   generater_   r   rf   r   r   )rj   r3   r'   r$   r   r   r   r   r   r   scale_in_nodescale_out_noder1   r   Zscales_noder   r   r,   r,   r-   r   %  s   



z8QuantizationTransformPass._insert_quant_range_abs_max_opc                 C   sf  |j | || | | d}| |}| tjkr!d}n| tjkr+d}nd}zt	
| j| }	W n   t	j
tg|d}	Y |j|tjjjdg| d}
t|
|	| j| j ||
 }||
d}||d}| js|jtd	tjjj| dgd
}| tjkrd}n| tjkrd}nd}t|t	jdg|d| j| j |jtdtjjj| dgd
}t|t	jdg|d| j| j || }|| }||d< ||d< ||d< ||d< || j| j|d}|jd|||d}||| ||
| ||| ||| | js/||| ||| ||| ||| ||fS )z+Insert fake_quantize_moving_average_abs_maxr   r   r   r   r   r   r   r   stater$   r   r   r   accumInStateInAccumOutStateOutAccumr   ro   rs   r   r   r   )r   r   r   r   r   r   r   r   r   r"   r   rQ   r   r%   r   r   r   r   r   r   r.   rR   r   r#   re   r   r   onesr`   r   r   )rj   r3   r'   r$   r   r   r   r   r   r   r   r   insoutsstate_in_nodeaccum_in_nodestate_out_nodeaccum_out_noder   r   r,   r,   r-   r   |  s   


zAQuantizationTransformPass._insert_quant_moving_average_abs_max_opc                 C   s@  |  sJ |  d|j| || | | d}| |}| tj	kr.d}	n| tj
kr8d}	nd}	zt| j| }
W n   tj| | g|	d}
Y |j| || | | g| d}t||
| j| j |jd||| j|dd	|i||d
d}||| ||| ||| ||fS )zL
        Insert fake_channel_wise_quantize_abs_max op in the graph.
        r   r   r   r   r   r   r   )r   r   rs   r   r   r   r   )r   r$   r   r   r   r   r   r   r   r   r   r"   r   rQ   r   r%   r   r   r.   rR   r   re   r   )rj   r3   r'   r$   r   r   r   r   r   r   r   r   r   r,   r,   r-   r     sT   
z2QuantizationTransformPass._insert_channel_quant_opc           	      C   s   |  sJ |  d|j| | | | | d}d|d > d }|jdt||d||dd|id}|	|| |	|| |	|| |S )	z9
        Insert fake_dequantize_op in the graph.
        r   r   r   r   	max_ranger   r   Scaler   r   )
r   r$   r   _dequantized_var_namer   r   r   r   floatr   )	rj   r3   r'   r   r   r   r   r
  dequant_op_noder,   r,   r-   r     s$   z,QuantizationTransformPass._insert_dequant_opc           
      C   s   |  sJ |  d|j| | | | | d}|jd|||d||dd|id}||| |D ]}	||	| q;||| |S )zK
        Insert fake_channel_wise_dequantize_max_abs in the graph.
        r   r   r   )r   r   r   r   ZScalesr   r   )	r   r$   r   r  r   r   r   r   r   )
rj   r3   r'   Zscale_var_nodesr   r   r   r   r  Zscale_nr,   r,   r-   r   2  s*   
z4QuantizationTransformPass._insert_channel_dequant_opc                 C   s   d}|j D ]}||  }q||  }|jD ]}||  }q|| j v r.| j| }|S | r=| }|| j|< |S ||j	 }|| j|< |S )z
        create a node that same with in_node in graph
        Args:
            graph(IrGraph): create node in graph.
            in_node(IrVarNode): create node that same with in_node.
        Returns:
            created new node
         )
r1   r$   r   rg   rc   Zis_ctrl_varZcreate_control_dep_varr   r   r#   )rj   r3   r6   keyinpnew_noder,   r,   r-   _create_new_nodeP  s    	




z*QuantizationTransformPass._create_new_nodec                 C   s   d}|j D ]}||  }q||  }|jD ]}||  }qd}|| j v r1| j| }d}n||j }|| j|< |rBdS |j D ]}| ||}	|	|	| qE|jD ]}| ||}	|	||	 qW|jD ]}
|
jD ]	}| 
||| qnqidS )aZ  
        copy op_node in source_graph to graph. And will run recursively
        for next ops that link to op_node's outputs.
        Args:
            graph(IrGraph): target graph to copy.
            source_graph(IrGraph): source graph to copy.
            op_node(IrOpNode): op node in source_graph.
        Returns:
            None

        r  FTN)r1   r$   r   rh   rc   create_op_node_from_descr   rN   r  r   _copy_graph)rj   r3   Zsource_graphr4   r  r  Zhas_createdZnew_op_noder6   r  r'   next_op_noder,   r,   r-   r  j  s2   







z%QuantizationTransformPass._copy_graphc              	   C   s  t  }t  }t||U || d < t| d | dd}||}| |j|j< t|}	|j	sJ| j
s>J dd|_| 
 }
|
|	 W d   n1 sTw   Y  W d   n1 scw   Y  t| j | j| W d   n1 s~w   Y  tt|j|j	d}|| |j}|| |j}g }g }| D ]}|jg kr| r|| q| D ]}|jg kr|| q|jD ]	}| ||| q|D ]}|jD ]	}| ||| qq|D ]	}| ||| q|| | }|| | }|| |	j}|j}|D ]
}|||| q|||| |j	s|jd	 }|| | d
 }|jd	 }|| | d
 }|| | d
 }|j}|||| d}|jD ]}| d
 | v ry|}qj|dur|||| n||| |D ]}|||| |r|||| q|jd	 }|jd	 }|jd	 }| | | | | | | | | |jd	  | | | | |S )a  
        Insert a tmp program that returned by func between var_node and op.

        Args:
            graph(IrGraph): target graph to insert tmp program.
            func(Function): function to define a tmp program
            var_node(IrVarNode): node in target graph.
            op(IrOpNode): op in target graph.
        Returns:
            op's new input that replaces var_node
        _Z
_tmp_inputr   )r   r   z3optimizer_func must be set when graph is test graphFN)Zfor_testr   @GRAD)!r	   r   Zswitch_name_generator_guardr$   r
   r   r   r   meanZ	_for_testrZ   Zstop_gradientZminimizer   rQ   r[   runr   r   ZGraphZdescr0   r   r1   r2   r   r   r   r  r   Zupdate_output_linkr   safe_remove_nodes)rj   r3   funcr'   rN   Ztmp_programZstartup_programr6   out_nodeZlossZ	optimizerZ	tmp_graphZin_node_paramsZ
in_op_noder   r4   Ztarget_in_noder   Z	loss_noder   Zop_outZop_out_gradZop_gradZtarget_out_grad_nodeZin_node_gradZin_node_grad_opZop_grad_outZ	mean_gradZmean_out_gradZfill_constant_noder,   r,   r-   r     s   


 



















z&QuantizationTransformPass._insert_funcc                 C   
   | dS zJ
        Return quantized variable name for the input `var_name`.
        
.quantizedr,   rj   r5   r,   r,   r-   r        
z-QuantizationTransformPass._quantized_var_namec                 C   r   zL
        Return dequantized variable name for the input `var_name`.
        .dequantizedr,   r#  r,   r,   r-   r    r$  z/QuantizationTransformPass._dequantized_var_namec                 C   r   zW
        Return the scale name of quantized variable for the input `var_name`.
        @scaler,   r#  r,   r,   r-   r     r$  z/QuantizationTransformPass._quantized_scale_namec                 C   sf   d}|  dr|  drd}| dv rt||rd}|  dr1|  ddkr1d}|S )zA
        Analyse whether the op node skips quantization.
        Fr=   T)r?   matmulr   qat_without_weight)rN   r   r   r$   r7   )rj   r3   r4   is_skipr,   r,   r-   r     s   z(QuantizationTransformPass._is_skip_quant)__name__
__module____qualname____doc__ru   r   r   r   r   r   r   r   r   r   r  r  r   r   r  r   r   r,   r,   r,   r-   r8   Y   sL    
  M'.Wh4&}r8   c                   @   sr   e Zd Z						dddZdd	 Zd
d Zdd Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd ZdS )QuantizationFreezePassFr9   roundr:   Nc	           	      C   s   |dusJ d|dusJ d|| _ || _t|| _|| _|| _|| _|| _t| _	t
| _t | _t | _t | _t | _dS )a  
        The freeze pass is used to adjust the quantize operator order, for example:
            1) `activation -> quant -> dequant -> conv2d` will be frozen into
            `activation -> quant -> conv2d -> dequant`
            2) `weight -> quant -> dequant -> conv2d` will be frozen into `weight -> conv2d`,
            and weight will be scaled offline.

        Args:
            scope(static.Scope): scope is used to get the weight tensor values.
            place(static.CPUPlace|static.CUDAPlace|str): place is used to restore the weight tensors.
                If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
            bias_correction(bool): whether use bias correction for post-training quantization.
                 https://arxiv.org/abs/1810.05723.
            weight_bits(int): quantization bit number for weights.
            activation_bits(int): quantization bit number for activation.
            round_type(str, optional): The method of converting the quantized weights
                value float->int. Currently supports ['round', 'adaround'] methods.
                Default is `round`, which is rounding nearest to the integer.
                'adaround' is refer to https://arxiv.org/abs/2004.10568.
            weight_quantize_type(str): quantization type for weights, support 'abs_max' and
                'channel_wise_abs_max'. The 'range_abs_max' usually is not used for weight,
                since weights are fixed once the model is well trained.
            quantizable_op_type(list[str]): This input param will be removed latter. The pass
                will process all quantized op, so it is not necessary to set the input param.
        Nr   r    )rQ   _bias_correctionr   rR   rS   rT   _round_typer^   _fake_quant_op_list_fake_quant_op_names_fake_dequant_op_list_fake_dequant_op_namesr   r   _op_input_rename_map_op_output_rename_map_quant_var_scale_mapr&   _quantized_ops)	rj   r)   r*   bias_correctionrk   rl   Z
round_typerm   rq   r,   r,   r-   ru   8  s   $



zQuantizationFreezePass.__init__c                 C   s  dd |  D }| }|D ]}| }|| jv r|dd }t|dr2||j v r2|j| }||vrH||j	|
dd }|| j|< q| |
dd }|jdv s[J d|jd	krd|d }|jd
krs| jdkrs|d }n| }|| j|< | jdkr| |}d}	| dr| d}	|| jvr| j| t| ||	| j}
t|
}
| jdu rtj||
||	| jd}
t|
}
| ||
 |  || q| }|D ]}| }|| j!v r|  || q| }|D ]4}| }|dr%|ddkr%| jdkr| tj"v rd
nd}	| #|||	 q| $|| q|D ] }|j%D ]}|j&| j'v rE|}| j'|j& }|(||| q-q(| )| |*  |S )z
        Adjust quantize/dequantize operators order for the inference process.

        Args:
            graph(IrGraph): the applied graph.
        Returns:
            None
        c                 S   rx   r,   ry   rz   r,   r,   r-   rO   v  r|   z0QuantizationFreezePass.apply.<locals>.<listcomp>r   r   r   r   r   r   #the dim of scale_v should be 1 or 2r   r   r:   r1  r   Trk   r   r   rA   )+r   r   r$   r5  inputhasattrr   rc   r0   r   outputr:  	_load_varndimsizer^   tolistr3  rN   r   r   r;  addr   quant_tensorcopyrS   r"   r1  r2  bias_correction_w_restore_var!_remove_fake_quant_and_dequant_opr7  r   _insert_post_channel_dequant_op_insert_post_dequant_opr1   r   r9  r   _remove_unused_var_nodesr   )rj   r3   r   r   r4   Zop_nameZinput_arg_namescale_vparam_vr   quantized_param_vZop_node_descr'   old_innew_inr,   r,   r-   r   l  s   
















zQuantizationFreezePass.applyc                 C   sl   | |j|dd }| |j|dd }|j| jvr%|| j|j< n
| j|j | j|j< || d S )Nr   r   r   )r0   r   rB  r1   r@  r   r8  r  )rj   r3   r4   kvr,   r,   r-   rL    s   z8QuantizationFreezePass._remove_fake_quant_and_dequant_opc                 C   sJ  dd |  D }|jD ]S}| }|| vrq|j| jv r2|}| j|j }|  |||| | |}	| j	|	 }
|	|v rSt
|
tsMJ d|	 dt|
}qt
|
tsZJ | j	|	 }qt| dkrrtd|  d||j| d }|jtd	tjjj|jd g| d
}| tjkrd}n| tjkrd}nd}t ||!|| j"| j# |j$| %| |& | | d
}d}| dv rt|jd  d }|' (dr|' )d}|j*d| j+| j,g|tj-j.j/|d|||gdd|id}|0|| |0|| |0|| |0|| || j1|j< |S )Nc                 S   rx   r,   ry   rz   r,   r,   r-   rO     r|   zJQuantizationFreezePass._insert_post_channel_dequant_op.<locals>.<listcomp>The scale of parameter z is not a list.r    Only support one output, but op  has more than one output.r   channel_scaler   r   r   r   )r)  r   r?   x_num_col_dimsr   )r   r   r   r[  r  r   r   )2r   r1   r$   r   r   r8  clear_outputsr   _original_var_namer:  r!   rb   r"   r   r   r   output_arg_namesr\   r0   r   r   r   r   r   r   r   r   r   r   r   r   r   r.   astyperQ   rR   r   r  r   rN   r   r   r   rS   rT   r   r   r   r   r9  )rj   r3   r4   r   r   r'   r$   rS  rT  original_var_namerP  rZ  r   output_var_nodeweight_scale_noder   r   r[  r  r,   r,   r-   rM    s   





z6QuantizationFreezePass._insert_post_channel_dequant_opc                 C   s  dd |  D }d}d| jd > d }d| jd > d }|jD ]`}| }|| vr-q |j| jv rF|}	| j|j }
|
  |	|	|
| | 
|}| j| }||v rp| |saJ d| d|dkrgdn|}||| 9 }q ||9 }t|ts{J | j| }q t| dkrtd|  d	||j| d
 }|j| | | | | d}|jdt|tjjjd||dd|id}||| ||| ||| || j |j< |S )Nc                 S   rx   r,   ry   rz   r,   r,   r-   rO   :  r|   zBQuantizationFreezePass._insert_post_dequant_op.<locals>.<listcomp>r   rW  z is not a float.g        g:0yE>rX  rY  r   r   r   r	  r  r   r   )!r   rS   rT   r1   r$   r   r   r8  r\  r   r]  r:  	_is_floatr!   r   r   r^  r\   r0   r   r   r  r   r   r   r   r  r   r   r   r   r   r9  )rj   r3   r4   r   r
  Zparam_rangeZ	act_ranger'   r$   rS  rT  r`  rP  r   ra  r   r  r,   r,   r-   rN  9  sd   



	z.QuantizationFreezePass._insert_post_dequant_opc                 C      t | j| S r   r"   r   rQ   r   r%   rj   r$   r,   r,   r-   rC  r     z QuantizationFreezePass._load_varc                 C   "   | j | }||| j d S r   rQ   r   r%   r&   rR   rj   r$   r   r+   r,   r,   r-   rK  u     z#QuantizationFreezePass._restore_varc                    z   t   | }|D ]}|jD ]} | q|jD ]} | qq	dd  D  t t fdd| }|| d S )Nc                 S      h | ]}|j qS r,   r   rM   nr,   r,   r-   	<setcomp>      zBQuantizationFreezePass._remove_unused_var_nodes.<locals>.<setcomp>c                    
   | j  vS r   rn  rn  Zall_used_varsr,   r-   <lambda>     
 zAQuantizationFreezePass._remove_unused_var_nodes.<locals>.<lambda>r&   r   r1   rG  r   filterr   r  rj   r3   r   r4   
input_nodeoutput_nodeZall_unused_varsr,   rt  r-   rO  y      


z/QuantizationFreezePass._remove_unused_var_nodesc                 C   st   | dr|dtd  S | dr|dtd  S | dr*|dtd  S | dr8|dtd  S |S )z4
        Return the original variable name.
        z.quantized.dequantizedNr"  r&  r(  )endswithr   r#  r,   r,   r-   r]    s   



z)QuantizationFreezePass._original_var_namec                 C   r   r%  r,   r#  r,   r,   r-   r    r$  z,QuantizationFreezePass._dequantized_var_namec                 C   s   t |ttjtjtjfS r   )r!   r  r"   r   r   r   )rj   rV  r,   r,   r-   rc    s   z QuantizationFreezePass._is_float)Fr9   r9   r1  r:   N)r,  r-  r.  ru   r   rL  rM  rN  rC  rK  rO  r]  r  rc  r,   r,   r,   r-   r0  7  s$    
4pR9r0  c                   @   s>   e Zd ZdddZdd Zdd Zdd	 Zd
d Zdd ZdS )ConvertToInt8PassNc                 C   s4   |dusJ d|dusJ d|| _ t|| _dS )a6  
        Convert the weights into int8_t type.

        Args:
            scope(static.Scope): scope is used to get the weight tensor values.
            place(static.CPUPlace|static.CUDAPlace|str): place is used to restore the
                8bits weight tensors. If it's string, It can be ``cpu``, and ``gpu:x``,
                where ``x`` is the index of the GPUs.
            quantizable_op_type(list[str]): This input param will be removed latter. The pass
                will process all quantized op, so it is not necessary to set the input param.
        Nr   r    )rQ   r   rR   )rj   r)   r*   rq   r,   r,   r-   ru     s   zConvertToInt8Pass.__init__c           	      C   s   dd |  D }| }i }|D ]7}| drH| ddkrH|jD ]!}| }||v rG||vr>| ||}|||< |||| | q&q| 	| |
  |S )z
        Convert weights' type of the graph. After that, the data type of the
        graph weights is int8_t.

        Args:
            graph(IrGraph): the applied graph.
        Returns:
            None
        c                 S   rx   r,   ry   rz   r,   r,   r-   rO     r|   z+ConvertToInt8Pass.apply.<locals>.<listcomp>r   r   )r   r   rN   r   r   r1   r$   _convert_to_int8r   rO  r   )	rj   r3   r   r   Z	input_mapr4   r'   r$   int8_var_noder,   r,   r-   r     s,   



zConvertToInt8Pass.applyc                 C   sZ   |  d }|j|| | tjjjd}| |  }| j	
| | ||tj |S )Nz.int8r   )r$   r   r   r   r   r   r   ZINT8rC  rQ   r#   
_store_varr"   int8)rj   r3   r'   Zint8_var_node_namer  r   r,   r,   r-   r    s   z"ConvertToInt8Pass._convert_to_int8c                 C   rd  r   re  rf  r,   r,   r-   rC    rg  zConvertToInt8Pass._load_varc                 C   s(   | j | }|||| j d S r   )rQ   r   r%   r&   r_  rR   )rj   r$   r   r   r+   r,   r,   r-   r    s   zConvertToInt8Pass._store_varc                    rl  )Nc                 S   rm  r,   rn  ro  r,   r,   r-   rq    rr  z=ConvertToInt8Pass._remove_unused_var_nodes.<locals>.<setcomp>c                    rs  r   rn  rn  rt  r,   r-   ru    rv  z<ConvertToInt8Pass._remove_unused_var_nodes.<locals>.<lambda>rw  ry  r,   rt  r-   rO    r|  z*ConvertToInt8Pass._remove_unused_var_nodesr   )	r,  r-  r.  ru   r   r  rC  r  rO  r,   r,   r,   r-   r~    s    
#r~  c                   @   s   e Zd Zdd Zdd ZdS )TransformForMobilePassc                 C   s   t | _t| _dS )z\
        This pass is used to convert the frozen graph for paddle-mobile execution.
        N)r4  r5  r6  r7  r   r,   r,   r-   ru     s   
zTransformForMobilePass.__init__c           	      C   s   |  }|D ]b}| }|| jv r:|d || }|jD ]}||| q |jD ]}||| q,|	| || j
v rh|d || }|jD ]}||| qN|jD ]}||| qZ|	| q|  |S )a+  
        Because paddle-mobile use `quantize` an `dequantize` as the names of
        quantize operator and dequantize operator, the `apply` function just
        realize this logic.

        Args:
            graph(IrGraph): the graph will be transformed.
        Returns:
            None
        quantizeZ
dequantize)r   r$   r5  set_typer  rN   r1   r   r   r  r7  r   )	rj   r3   r   r4   r$   Z
quant_noderz  r{  Zdequant_noder,   r,   r-   r     s,   









zTransformForMobilePass.applyN)r,  r-  r.  ru   r   r,   r,   r,   r-   r     s    r  c                   @   s0   e Zd Z					d	ddZdd Zdd ZdS )
OutScaleForTrainingPassNr<   c                 C   s4   || _ t|| _|| _|| _tt | _|| _	dS )aS  
        This pass is used for calculating output scales of some operators.
        These output scales may be used by tensorRT or some other inference engines.

        Args:
            scope(static.Scope): The scope is used to initialize these new parameters.
            place(static.CPUPlace|static.CUDAPlace|str): The place is used to initialize new parameters.
                If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the
                index of the GPUs.
            moving_rate(float): The decay coefficient of moving average. The default value is 0.9.
        N)
rQ   r   rR   r`   re   rb   r   rc   _teller_set_scale_dict)rj   r)   r*   ro   rs   
scale_dictr,   r,   r-   ru   +  s   

z OutScaleForTrainingPass.__init__c                 C   sT  t |ts	J d| jdu r| | _g }| D ]}| | jv r'|| qtt	|dddm}|D ]`}t
|D ]S}||j|}| tjjjtjjjtjjjfvs_d| v r`q=| tjkrjd}n| tjkrtd}nd	}z|| | |  W q=   |j| | tjjjd
g| d}| jdurzt| j|  g}	W n   tjd
g|d}	Y ntjd
g|d}	Y t ||	| j!| j" d|i}
d|i}| js;|jt#$dtjjj| d
gd}t |tjd
g|d| j!| j" |jt#$dtjjj| d
gd}t |tjd
g|d| j!| j" |%|& }|%|& }||
d< ||
d< ||d< ||d< | j'| j|( )dd}|j*d||
|d}d}t	|jdkr`|jd }|+|| |+|| |ru|+|| | js|+|| |+|| |+|| |+|| q=|,  q5W d   |S 1 sw   Y  |S )z
        Insert the `moving_average_abs_max_scale` op in order to calculate output scales
        of operators in the teller_set.

        Args:
            graph(IrGraph): the target graph.
        rw   Nz.Adding OutScale op:|{bar}| {n_fmt}/{total_fmt}r   r   r  r   r   r   r   r   r   r   r   zscale_state@r   zscale_accum@r   r   r   r   r   )ro   rs   r   moving_average_abs_max_scaler   r   )-r!   r   re   rs   r   r$   r  r   r   r   r   _get_op_output_var_namesr0   r   r   r   r   r   FP64FP32FP16r   r   r   r   _scale_namer   r   r  r"   r   r  r.   rQ   rR   r   r   r   r#   r`   rN   r   r   r   r   )rj   r3   Z
target_opsrN   r   Zoutput_var_namer6   r   
scale_noder   r  r  r  r  r  r  r   Zscale_op_noder  r,   r,   r-   r   E  s   







xxzOutScaleForTrainingPass.applyc                 C   r   E
        Return the scale name for the var named `var_name`.
        r(  r,   r#  r,   r,   r-   r    r$  z#OutScaleForTrainingPass._scale_name)NNr<   NNr,  r-  r.  ru   r   r  r,   r,   r,   r-   r  *  s    
 r  c                   @   s&   e Zd ZdddZdd Zdd ZdS )	OutScaleForInferencePassNc                 C   s   || _ tt | _dS )a  
        This pass is used for setting output scales of some operators.
        These output scales may be used by tensorRT or some other inference engines.

        Args:
            scope(static.Scope): The scope is used to initialize these new parameters.
        N)rQ   rb   r   rc   r  )rj   r)   r,   r,   r-   ru     s   z!OutScaleForInferencePass.__init__c                 C   s@  t |ts	J d| }|D ]}| | jv rt|}|D ]y}||j|}|j	
 du sA| tjjjtjjjtjjjfvrBq| |}| j|}|dusYJ d| dt| d }	| dt|	 t||}
|
dus|J |d | |
d t|
d  d	 t|	 | d
d qq|  |S )z
        Get output scales from the scope and set these scales in op_descs
        of operators in the teller_set.

        Args:
            graph(IrGraph): the target graph.
        rw   NzCan not find z variable in the scoper   Zout_thresholdz is not the output of the opr   Z
_thresholdr   T)r!   r   r   r$   r  r   r  r0   r   r   r#   r   r   r   r   r  r  r  r  rQ   r   r"   r   r%   rN   r   r  Z_get_output_name_indexr   r   )rj   r3   Zop_nodesr4   	var_namesr5   r6   r   Z	scale_varr   Zargname_indexr,   r,   r-   r     sP   




zOutScaleForInferencePass.applyc                 C   r   r  r,   r#  r,   r,   r-   r    r$  z$OutScaleForInferencePass._scale_namer   r  r,   r,   r,   r-   r    s    
5r  c                   @   sH   e Zd ZdZg dZdddddgddgddfd	d
Zdd Zdd ZdS )AddQuantDequantPasszq
    Quantize the ops that do not have weights, and add quant_dequant op for the
    quantized ops's inputs.
    ZreluZrelu6Z
leaky_relutanhZswishNr<   r9   r=   elementwise_addpool2dc	           
      C   s   || _ t|| _|| _|| _|| _|| _|| _|| _| jD ]}	|	t	t
 v s-J |	d qdd | jD | _| j dus@J d| jdusIJ ddS )a?  
        Constructor.

        Args:
            scope(static.Scope): The scope is used to initialize these new parameters.
            place(static.CPUPlace|static.CUDAPlace|str): place is used to initialize new
                parameters described above. If ``place`` is string, it can be It can be ``cpu``
                or ``gpu:x``, where ``x`` is the index of the GPUs.
            moving_rate(float, optional): the param for 'quant_dequant_moving_average_abs_max'
                quantization. Default is 0.9.
            quant_bits(int, optional): quantization bit number for activation. Default is 8.
            skip_pattern(str, optional): The user-defined quantization skip pattern, which
                will be presented in the name scope of an op. When the skip pattern is
                detected in an op's name scope, the corresponding op will not be quantized.
                Default is 'skip_quant'.
            quantizable_op_type(list[str], optional): List the type of ops that will be
                quantized. Default is ["elementwise_add", "pool2d"].
        rI   c                 S   rJ   rK   r,   rL   r,   r,   r-   rO   R  rP   z0AddQuantDequantPass.__init__.<locals>.<listcomp>Nscope must not be None.place must not be None.)rQ   r   rR   r`   _quant_bitsre   rU   r  _quantizable_op_typerb   r   rc   _quantizable_grad_op_type
rj   r)   r*   ro   r   rp   rq   rs   r  r   r,   r,   r-   ru   (  s"   

zAddQuantDequantPass.__init__c                    sL  t |ts	J d| jdu r| | _t }| }tt|ddd}|D ]  	 | j
v rd}t | jtrL  doJt fdd	| jD }nt | jtrf  doe  d| jd
k}  dou  ddk}|s|st| sq'  dd   d| j   dd t }d}|D ]
}	d|	v rd} nq|rq'|D ]-}	| j|	}
|	|v r||	 }n| ||
| j  d\}}|||	< ||
|  q|  q'W d   n1 sw   Y  |D ])  	 | jv r  D ]}||v r| j|}
|| }||
|  qq|  |S )
        Add quant_dequant before some ops, such as the 'elementwise_add' and
        'pool2d' op.

        Args:
            graph(IrGraph): the target graph.
        Returns:
            None
        rw   N6Adding quant activation op:|{bar}| {n_fmt}/{total_fmt}r   r   Fr}   c                 3   r~   r   r   r   r   r,   r-   r   u  r   z,AddQuantDequantPass.apply.<locals>.<genexpr>r   r   r   r*  rl   r   Tquantized.dequantizedr   ) r!   r   re   rs   r   r   r   r   r   r$   r  rU   rb   rN   r   r   r   r   r   r7   r   r  r   r/   r0   r1   /_insert_quant_dequant_moving_average_abs_max_opr   r   r  r   r   )rj   r3   dequantized_vars_mapr   r   r+  is_quantized	arg_namesr=   arg_namer6   r   r  
input_namer   r,   r   r-   r   Y  s   






G
zAddQuantDequantPass.applyc                 C   s  |j |  d| | | d}|  d}| tjkr%d}n| tjkr/d}nd}z+| jdurN| | j	 v rNt
j| j|  g|d}nt
j| j| |d}W n   t
jtg|d}Y |j|  dtjjjd	g| d}	t|	|| j| j ||	 }
||	d
}||
d}| js|jtdtjjj| d	gd}| tjkrd}n| tjkrd}nd}t|t
jd	g|d| j| j |jtdtjjj| d	gd}t|t
jd	g|d| j| j || }|| }||d< ||d< ||d< ||d< || j| j|d}|jd|||d}||| ||	| ||| |||
 | jsW||| ||| ||| ||| ||
fS )z:Insert fake_quantize_dequantize_moving_average_abs_max op.z.quant_dequantr   z.quant_dequant@scaler   r   r   Nr   r   r   r   zquant_dequant.stater   zquant_dequant.accumr   r   r   r   r  r   r   ) r   r$   r   r   r   r   r   r   r  rc   r"   r   rQ   r   r%   r   r   r   r   r   r   r.   rR   r   r#   re   r   r   r  r`   r   r   )rj   r3   r'   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r   r   r,   r,   r-   r    s   


zCAddQuantDequantPass._insert_quant_dequant_moving_average_abs_max_op)r,  r-  r.  r/  _activation_typeru   r   r  r,   r,   r,   r-   r    s    
1ir  c                   @   sd   e Zd ZdZ						ddd	Zddejjjfd
dZ	dd Z
dd Zdd Zdd Zdd ZdS )InsertQuantizeLineara  
    Insert quantize_linear and dequantize_linear op before ops.

    Args:
        place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors.
            If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
        scope(paddle.Scope): scope is used to get the weight tensor values.
        quant_bits(int, optional): quantization bit number for weight. Default is 8.
        quant_axis(int, optional): quantization dimension of channels. When it is greater than or
            equal to 0, it will quantization with per channel, else quantization with per layer.
            Default is -1.
        channel_wise(bool, optional): Whether quantization with per channel or not. Default is False.
        moving_rate(float): the rate for 'moving average' method.
        is_test(bool, optional): Whether quantization with training or not. Default is True.
        scale_dict(dict, optional): calibration ranges of tensors output.
    r9   r   Fr<   TNc	           	      C   s4   || _ || _|| _|| _|| _|| _|| _|| _d S r   )rR   rQ   r   r   channel_wisere   r`   r  )	rj   r*   r)   r   r   r  ro   rs   r  r,   r,   r-   ru   G  s   
zInsertQuantizeLinear.__init__c                 C   s  |  sJ |  d|s| n|}|j| || | | d}|s| tjkr3d}n| tj	kr=d}nd}| 
|}| jr]| | j }	tjjj}
tj|	|dt }nd}	| }
tjtg|d}| jd ur| | j v rtj| j|  g|d}|j||
|	g| d}t||| j| j d }|d u r|j| | tjjj| tjjjd}t|tj| dd| j| j ||d	}|d ur||d
< | j| jd}||d< d|i}| jst| |! }|jt"#dtjjj| dgd}| tjkrd}n| tj	krd}nd}t|tjdg|d| j| j |jt"#dtjjj| dgd}t|tjdg|d| j| j | |! }| |! }||d< ||d< ||d< ||d< ||d< | j|d< | j$|d< |j%d|||d}|&|| |&|| |d ur|&|| |&|| | js|&|| |&|| |&|| |&|| |&|| ||fS )Nr   r   r   r   r   r   r   int32r  	ZeroPointr   r   r   Yr   r   r   r   r   r   r   r   rs   ro   quantize_linearr   )'r   r$   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r"   r  r   r   r  rc   r   r.   rQ   rR   _zero_point_nameINT32r   r   re   r   r#   r   r   r`   r   r   )rj   r3   r'   r5   r   r   r   r   r   Zscale_var_shapeZscale_var_typeZinit_scale_valuezero_point_noder1   r   r   r   r  r  r  r  r   r,   r,   r-   insert_quant_op[  s   






z$InsertQuantizeLinear.insert_quant_opc           
      C   s  |  sJ |  d|j| | | | | d}d }|d u rM|j| | t	j
jj| t	j
jjd}t|tj| dd| j| j ||d}|d urZ||d< | j| jd}||d< |jd	||d
|id}	|||	 |||	 |d ur|||	 ||	| |S )Nr   r   r  r   r  r  r  r   dequantize_linearr  r   )r   r$   r   r  r   r   r   r   r  r   r   r   r   r  r.   r"   r   rQ   rR   r   r   r   r   )
rj   r3   r'   r   r   r   r  r1   r   r   r,   r,   r-   insert_dequant_op  sL   
z&InsertQuantizeLinear.insert_dequant_opc                 C   r   r!  r,   r#  r,   r,   r-   r   	  r$  z(InsertQuantizeLinear._quantized_var_namec                 C   r   r%  r,   r#  r,   r,   r-   r  !	  r$  z*InsertQuantizeLinear._dequantized_var_namec                 C   r   r'  r,   r#  r,   r,   r-   r   '	  r$  z*InsertQuantizeLinear._quantized_scale_namec                 C   r   r  @zero_pointr,   r#  r,   r,   r-   r  -	  r$  z%InsertQuantizeLinear._zero_point_name)r9   r   Fr<   TN)r,  r-  r.  r/  ru   r   r   r   r   r  r  r   r  r   r  r,   r,   r,   r-   r  5  s&    

 .r  c                   @   sr   e Zd ZdZdddddddddgg ddddddddfd	d
Zdd Zdd Zdd Zdd Zdd Z	dd Z
dS )QuantizationTransformPassV2z
    Quantize the ops that have weights. Add quant and dequant ops for
    the quantized ops's inputs. It is used in the new format of quantization.
    Nr9   r:   r;   r<   r=   r>   c                 C   s  || _ t|| _|| _|| _|	| _|| _|| _|| _|| _	|| _
|| _d| _g d}|dks2J d||vr>td| d||vrJtd| d|| _|| _|| _|| _|
| _| jD ]}|tt v slJ |d	 q\d
d | jD | _|| _d| _i | _i | _dS )a   
        Args:
            scope(paddle.Scope): When activation use 'range_abs_max' as the quantize
                type, this pass will create some new parameters. The scope is used to
                initialize these new parameters.
            place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to initialize new
                parameters described above. If it's string, It can be ``cpu``, and ``gpu:x``,
                where ``x`` is the index of the GPUs.
            weight_bits(int): quantization bit number for weights,
                the bias is not quantized.
            activation_bits(int): quantization bit number for activation.
            activation_quantize_type(str): quantization type for activation,
                now support 'abs_max', 'range_abs_max' and 'moving_average_abs_max'.
                If use 'abs_max' mode, the quantization scale will be calculated
                dynamically each step in both training and testing period. If use
                'range_abs_max', a static quantization scale will be calculated
                during training and used in inference.
            weight_quantize_type(str): quantization type for weights,
                support 'abs_max' and 'channel_wise_abs_max'. The 'range_abs_max'
                usually is not used for weight, since weights are fixed once the
                model is well trained.
            window_size(int): the window size for 'range_abs_max' quantization.
            moving_rate(float): the param for 'moving_average_abs_max' quantization.
            skip_pattern(str or str list): The user-defined quantization skip pattern, which
                will be presented in the name scope of an op. When the skip pattern is
                detected in an op's name scope, the corresponding op will not be quantized.
            quantizable_op_type(list[str]): List the type of ops that will be quantized.
                Default is ["conv2d", "depthwise_conv2d", "mul"]. The quantizable_op_type in
                QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
            weight_quantize_func(function): Function that defines how to quantize weight.
                Using this can quickly test if user's quantization method works or not.
                In this function, user should both define quantization function and
                dequantization function, that is, the function's input is non-quantized
                weight and function returns dequantized weight. If None, will use
                quantization op defined by 'weight_quantize_type'. Default is None.
            act_quantize_func(function): Function that defines how to quantize activation.
                Using this can quickly test if user's quantization method works or not.
                In this function, user should both define quantization and dequantization
                process, that is, the function's input is non-quantized activation and
                function returns dequantized activation. If None, will use quantization
                op defined by 'activation_quantize_type'. Default is None.
            weight_preprocess_func(function): Function that defines how to preprocess
                weight before quantization. Using this can quickly test if user's preprocess
                method works or not. The function's input is non-quantized weight and
                function returns processed weight to be quantized. If None, the weight will
                be quantized directly. Default is None.
            act_preprocess_func(function): Function that defines how to preprocess
                activation before quantization. Using this can quickly test if user's
                preprocess method works or not. The function's input is non-quantized
                activation and function returns processed activation to be quantized.
                If None, the activation will be quantized directly. Default is None.
            optimizer_func(function): Function return a optimizer. When 'is_test' is
                False and user want to use self-defined quantization function and
                preprocess function, this function must be set. Default is None.
            executor(paddle.Executor): If user want to use self-defined quantization
                function and preprocess function, executor must be set for initialization.
                Default is None.

        Examples:
            .. code-block:: python

                >>> # The original graph will be rewrite.
                >>> import paddle
                >>> import paddle.static as static
                >>> from paddle.static.quantization import QuantizationTransformPassV2
                >>> from paddle.base.framework import IrGraph
                >>> from paddle.framework import core

                >>> graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
                >>> place = paddle.CPUPlace()
                >>> scope = paddle.static.global_scope()
                >>> transform_pass = QuantizationTransformPassV2(scope, place)
                >>> transform_pass.apply(graph)
        Fr@   rA   rD   rE   rF   rG   rH   rI   c                 S   rJ   rK   r,   rL   r,   r,   r-   rO   	  rP   z8QuantizationTransformPassV2.__init__.<locals>.<listcomp>N)rQ   r   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   _conv1dtranspose_flagr\   r]   r^   r_   r`   ra   rb   r   rc   rd   re   rf   rg   rh   ri   r,   r,   r-   ru   :	  sP   ^





z$QuantizationTransformPassV2.__init__c                    s   d}t | jtr  dot fdd| jD }nt | jtr6  do5  d| jdk}|rJ  	dd   	dd d S d S )	NFr}   c                 3   r~   r   r   r   r   r,   r-   r   	  r   z@QuantizationTransformPassV2._quant_preprocess.<locals>.<genexpr>r   r=   Tr   r   )rj   r4   r   r,   r   r-   r   	  s   
z-QuantizationTransformPassV2._quant_preprocessc              
   C   s8  |  dd |  d}d }|j}|D ]}| | vr"q| | jv r1| j|  }n| }|| jv r;q| | jv sI| | j	v rKdnd}	|	r^| j
d ur^| || j
||}n|	sn| jd urn| || j||}|	r| jd ur| || j||}
| j| q|	s| jd ur| || j||}
| j| q| | jv r| jn| j}|	r| jn| j}d}d}|dkrd}| }|dko|  d	}|r|d
 n|}| jrd}d| _n| tjv rdnd}t| j| j|||| j| jd}|j||||d\}}|||||}|| j|< |	r|}|||| q|S )Nr   r   r   TFr   rA   r   r   r   r   r   r   r   r  ro   rs   r5   r   )rN   r   r   r1   r$   r   r   r   r   persistable_cast_output_varsrX   r   rY   rV   r   rW   rS   rT   r^   r]   r  r   r   r  rR   rQ   r`   re   r  r  r   )rj   r3   rN   r   rb  r1   r'   r   r$   r   r   r   rt   r   r  r   r   insert_quant_passr   r   r,   r,   r-   r   	  s   






z.QuantizationTransformPassV2._transform_forwardc                 C   sL   |j D ] }| | vrq| | jv r#| j|  }|||| qd S r   )r1   r$   r   r   r   )rj   r3   rN   r'   r   r,   r,   r-   r   H
  s   
z/QuantizationTransformPassV2._transform_backwardc                 C   sF   d}|j D ]}| | vrq| | jv s| | jv r d}q|S r   )r1   r$   r   r   r  )rj   rN   r   r'   r,   r,   r-   r   P
  s   
z'QuantizationTransformPassV2._has_weightc              
   C   s  d|  vsd|dd vrd S |dd }d }| D ])}t|}||v rG| |rG|  dkrA| ||s@| ||}q| ||}q|jD ]}|  |krV| j	n| j
}|  |krb| jn| j}	d}
d}|	dkrd	}|  tjv rxd
nd}
dtjv rtjd | ||r d S t| j| j||
|| j| jd}|  |kr|nd }|j|||  || dd\}}||||| d}|||| qKd S )Nr   Z
unsqueeze2Filterr   r   r   FrA   Tr   r  r   )r5   r   r   )r$   r@  r   r   r  r   r   r   r1   rS   rT   r^   r]   r   remover  rR   rQ   r`   re   r  rN   r   r  r   )rj   r3   rN   Zconv_weight_var_namerb  _opr  r'   r   rt   r   r  r  r   r   r   r,   r,   r-   _quant_conv1d\
  s   


z)QuantizationTransformPassV2._quant_conv1dc                 C   s  t |ts	J d| jdu r| | _t | _g | _g | _dd |	 D | _|
 }g | _|
 D ]}| dkrP|jd  | jv rP| j|jd   q3|D ]}| | jv sc| | jv rh| | qSi |_tt|ddd	0}|D ]%}| | jv r| ||s| |r| || n| || |  qxW d   n1 sw   Y  |D ]}| | jv r| |r| || q|S )
rv   rw   Nc                 S   rx   r,   ry   rz   r,   r,   r-   rO   
      z5QuantizationTransformPassV2.apply.<locals>.<listcomp>castr   r   r   r   )r!   r   re   rs   r   r   r   r   r   r   r   r  r$   r1   r   r   ra   rd   r   r   r   r   r   r   r   r  r   r   )rj   r3   r   rN   r   r,   r,   r-   r   
  sZ   




z!QuantizationTransformPassV2.apply)r,  r-  r.  r/  ru   r   r   r   r   r  r   r,   r,   r,   r-   r  4	  s6    
 jIr  c                   @   s@   e Zd ZdZg dZdddddgddgddfd	d
Zdd ZdS )AddQuantDequantPassV2z
    Quantize the ops that do not have weights, and add quant_linear and dequant_linear
    op for the quantized ops's inputs. It is used in the new format of quantization.
    r  Nr<   r9   r=   r  r  c	           
      C   s   || _ t|| _|| _|| _|| _|| _|| _|| _| jD ]}	|	t	t
 v s-J |	d qdd | jD | _| j dus@J d| jdusIJ dg | _dS )aJ  
        Args:
            scope(paddle.Scope): The scope is used to initialize these new parameters.
            place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to initialize new
                parameters described above. If ``place`` is string, it can be It can be ``cpu``
                or ``gpu:x``, where ``x`` is the index of the GPUs.
            moving_rate(float, optional): the param for 'quant_dequant_moving_average_abs_max'
                quantization. Default is 0.9.
            quant_bits(int, optional): quantization bit number for activation. Default is 8.
            skip_pattern(str, optional): The user-defined quantization skip pattern, which
                will be presented in the name scope of an op. When the skip pattern is
                detected in an op's name scope, the corresponding op will not be quantized.
                Default is 'skip_quant'.
            quantizable_op_type(list[str], optional): List the type of ops that will be
                quantized. Default is ["elementwise_add", "pool2d"].
            scale_dict(dict, optional): calibration ranges of tensors output.

        Examples:
            .. code-block:: python

                >>> # The original graph will be rewrite.
                >>> import paddle
                >>> import paddle.static as static
                >>> from paddle.static.quantization import AddQuantDequantPassV2
                >>> from paddle.base.framework import IrGraph
                >>> from paddle.framework import core

                >>> graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
                >>> place = paddle.CPUPlace()
                >>> scope = paddle.static.global_scope()
                >>> add_quant_dequant_pass = AddQuantDequantPassV2(scope, place)
                >>> add_quant_dequant_pass.apply(graph)
        rI   c                 S   rJ   rK   r,   rL   r,   r,   r-   rO   -  rP   z2AddQuantDequantPassV2.__init__.<locals>.<listcomp>Nr  r  )rQ   r   rR   r`   r  re   rU   r  r  rb   r   rc   r  r   r  r,   r,   r-   ru   
  s$   ,


zAddQuantDequantPassV2.__init__c                    s  t |ts	J d| jdu r| | _t }dd | D | _| }t	t
|ddd}|D ]܉   | jv r	d}t | jtrW  d	oUt fd
d| jD }nt | jtrq  d	op  d	| jdk}  do  ddk}|s|rq1t }d}|D ]
}	d|	v rd} nq|rq1|D ]i}	| j|	}
|
 rq|
 tjtjtjfvrt d   d  nC|	|v r||	 }n2t!| j"| j#| j$dd| j%| j| j&d}|j'||
  dd\}}|(|||  d}|||	< |)|
|  q|*  q1W d   n	1 sw   Y  |D ]*   | j+v rI , D ]}||v rG| j|}
|| }|)|
|  q.q |S )r  rw   Nc                 S   rx   r,   ry   rz   r,   r,   r-   rO   F  r  z/AddQuantDequantPassV2.apply.<locals>.<listcomp>r  r   r   Fr}   c                 3   r~   r   r   r   r   r,   r-   r   U  r   z.AddQuantDequantPassV2.apply.<locals>.<genexpr>r   r   r   r  Tz
Since the zJ contains an input of type INT, the quantization of this layer is skipped.)r   r   r  ro   rs   r  r   )r   )-r!   r   re   rs   r   r   r   r   r   r   r   r$   r  rU   rb   rN   r   r   r   r   r   r   r/   r0   r1   r2   r   r   r   r   r   _loggerwarningr  rR   rQ   r  r`   r  r  r  r   r   r  r   )rj   r3   r  r   r   r+  r  r  r=   r  r6   r   r  r   r   r  r,   r   r-   r   5  s   






\
zAddQuantDequantPassV2.apply)r,  r-  r.  r/  r  ru   r   r,   r,   r,   r-   r  
  s    
Ar  c                   @   s:   e Zd ZdZdddZdd Zdd Zd	d
 Zdd ZdS )ReplaceFakeQuantDequantPasszS
    replace quant-dequant ops with quantize_linear and dequantize_linear ops.
    r9   c                 C   s>   t || _|| _|| _| jdusJ d| jdusJ ddS )a  
        Args:
            scope(paddle.Scope): The scope is used to initialize these new parameters.
            place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to initialize new
                parameters described above. If ``place`` is string, it can be It can be ``cpu``
                or ``gpu:x``, where ``x`` is the index of the GPUs.
            quant_bits(int, optional): quantization bit number for activation. Default is 8.

        Examples:
            .. code-block:: python

                >>> # The original graph will be rewrite.
                >>> import paddle
                >>> import paddle.static as static
                >>> from paddle.static.quantization import ReplaceFakeQuantDequantPass
                >>> from paddle.base.framework import IrGraph
                >>> from paddle.framework import core

                >>> graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
                >>> place = paddle.CPUPlace()
                >>> scope = paddle.static.global_scope()
                >>> replace_pass = ReplaceFakeQuantDequantPass(scope, place)
                >>> replace_pass.apply(graph)
        Nr  r  )r   rR   rQ   r  )rj   r)   r*   r   r,   r,   r-   ru     s
   
z$ReplaceFakeQuantDequantPass.__init__c                 C   s  t |ts	J dg }g }g }| D ]}| dkr%||dd  q| D ]$}| tv s8| dkrN|dd }||v rI|| q*|| q*|D ]'}||j	|dd }||j
|dd }	|	j
D ]	}
||	||
 qnqQ|D ]}| || || q{|  |S )Nrw   r  r   r   r   )r!   r   r   r$   r   rB  _fake_quant_dequant_op_listr@  r0   r1   r   r   _replace_opr  r   )rj   r3   Zfake_quant_dequant_opsZremove_fake_quant_opsZobserver_out_node_namesrN   r5   r  x_noder  r  r,   r,   r-   r     s>   

z!ReplaceFakeQuantDequantPass.applyc                 C   s  | |j|dd }| |j|dd }| |j|dd }| dr2| dnd}| drB| dn| j}d|d > d }d| d }	d }
|}|
d u r|j	| 
| tjjj| tjjjd	}
t|
tj| d
d| j| j |j| | | | | d	}|jd|||	|d|||
dd|id}||| ||| |
d ur||
| ||| |jd|||	|d|||
dd|id}||| ||| |
d ur||
| ||| d S )Nr   r   r   r   r   r   r   r   r   r  r   r  )r   r   qminqmax)r   r  r  r  r   r  )r0   r1   r@  r   rB  rN   r   r   r  r   r  r$   r   r   r   r   r   r  r.   r"   r   rQ   rR   r   r   r   r   r   r   )rj   r3   rN   r  r  r  r   r   r  r  r  Zquanted_noder   r   r  r,   r,   r-   r    s   z'ReplaceFakeQuantDequantPass._replace_opc                 C   r   r!  r,   r#  r,   r,   r-   r   R  r$  z/ReplaceFakeQuantDequantPass._quantized_var_namec                 C   r   r  r,   r#  r,   r,   r-   r  X  r$  z,ReplaceFakeQuantDequantPass._zero_point_nameN)r9   )	r,  r-  r.  r/  ru   r   r  r   r  r,   r,   r,   r-   r    s    
&Qr  c                   @   s@   e Zd ZdZ			dddZdd Zd	d
 Zdd Zdd ZdS )QuantWeightPassa  
    quant weights and remove weights input quantize_linear node. for example:
    `weight -> quant -> dequant -> conv2d` will be frozen into `weight -> dequant -> conv2d`,
    and weight will be scaled offline.

    Args:
        scope(paddle.Scope): scope is used to get the weight tensor values.
        place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors.
            If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
        bias_correction(bool): whether use bias correction for post-training quantization.
             https://arxiv.org/abs/1810.05723.
        quant_bits(int, optional): quantization bit number for weight. Default is 8.
        save_int_weight(bool, optional): Whether the type saving the weight is int. Default is True.

    Examples:
        .. code-block:: python

            >>> # The original graph will be rewrite.
            >>> import paddle
            >>> import paddle.static as static
            >>> from paddle.static.quantization import QuantWeightPass
            >>> from paddle.base.framework import IrGraph
            >>> from paddle.framework import core

            >>> graph = IrGraph(core.Graph(paddle.static.Program().desc), for_test=False)
            >>> place = paddle.CPUPlace()
            >>> scope = paddle.static.global_scope()
            >>> quant_weight_pass = QuantWeightPass(scope, place)
            >>> quant_weight_pass.apply(graph)
    Fr9   Tc                 C   sR   t || _|| _|| _|| _|| _| jd usJ d| jd us#J dt | _d S )Nr  r  )r   rR   rQ   r2  r  _save_int_weightr&   r;  )rj   r)   r*   r<  r   Zsave_int_weightr,   r,   r-   ru     s   
zQuantWeightPass.__init__c                 C   s  t |ts	J dg }dd | D }|D ]}||j|dd }| r||j|dd }||j|dd }||j|dd }| 	|
 }	|	jd	v s\J d
|	jdkre|	d }	|	jdkru|
 dkru|	d }	n|	 }	| 	|
 }
| d}| d}|
 | jvr| j|
  tj|
 |	||dd}| jdu rtj|
||	||d}| jr| jdkrtj}||}| |
 | |jD ]	}|||| q|| q| | d S )Nrw   c                 S   s   g | ]
}|  d kr|qS )r  ry   rL   r,   r,   r-   rO     s    z)QuantWeightPass.apply.<locals>.<listcomp>r   r   r  r  r  r=  r>  r   r   r:   r   r   T)Zonnx_formatr?  r9   ) r!   r   r   r0   r1   r@  r2   r   rB  rC  r$   rD  rE  rF  rN   r   r;  rG  r   rH  rI  r2  rJ  r  r  r"   r  r_  rK  r   r  rO  )rj   r3   Zfake_quant_ops_for_weightZfake_quant_opsr  r  r  r  r  rP  rQ  r   Zbits_lengthrR  Zsave_weight_dtyper  r,   r,   r-   r     sx   





zQuantWeightPass.applyc                    rl  )Nc                 S   rm  r,   rn  ro  r,   r,   r-   rq    rr  z;QuantWeightPass._remove_unused_var_nodes.<locals>.<setcomp>c                    rs  r   rn  rn  rt  r,   r-   ru    rv  z:QuantWeightPass._remove_unused_var_nodes.<locals>.<lambda>rw  ry  r,   rt  r-   rO    r|  z(QuantWeightPass._remove_unused_var_nodesc                 C   rd  r   re  rf  r,   r,   r-   rC    rg  zQuantWeightPass._load_varc                 C   rh  r   ri  rj  r,   r,   r-   rK    rk  zQuantWeightPass._restore_varN)Fr9   T)	r,  r-  r.  r/  ru   r   rO  rC  rK  r,   r,   r,   r-   r  _  s    #
Cr  c                   @   s:   e Zd ZdZdg ddfddZdd Zd	d
 Zdd ZdS )AddQuantDequantForInferencePassz
    When export quant model, it will traverse to find the output of each op, and then insert the quant/dequant op after it.
    r9   NTc                 C   s8   || _ || _|| _|| _|r|ntt | _|| _dS )a  
        Args:
            scope(static.Scope): The scope is used to initialize these new parameters.
            place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors.
                If it's string, it can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
            quant_bits(int, optional): quantization bit number for weight. Default is 8.
        N)	rQ   rR   r  _only_observerrb   r   rc   r  _calibration_range_dict)rj   r)   r*   r   rq   Zcalibration_range_dictonly_observerr,   r,   r-   ru     s   

z(AddQuantDequantForInferencePass.__init__c           
      C   s  t |ts	J di }t }| D ]>}| | jv rQt|}|D ]-}|	|j
|}| tjtjtjfvr9q#||v rB|| }n
| ||}|||< |||< q#q| D ]-}| dkrd|| qVt|}|D ]}||r|	|j|}	||	|| | qkqV|S )E
        Args:
            graph(IrGraph): the target graph.
        rw   r  )r!   r   r   r   r   r$   r  r   r  r0   r   r   r   r   r   r   _insert_quant_dequant_opr  r/   getr1   r   )
rj   r3   Zdequant_node_mapr  r4   r  r5   r  r   r6   r,   r,   r-   r     sR   





	z%AddQuantDequantForInferencePass.applyc                 C   r   r  r,   r#  r,   r,   r-   r  >  r$  z+AddQuantDequantForInferencePass._scale_namec              	   C   s  |  sJ |  d| }d}|j| d| | | d}z|| | |}W nH   | j	rn|| j	v rn| j	| }|j
| || dg| d}| tjkr]dnd}t|tj||d| j| j ntd	| d
 Y d S Y z|| |  d}	W n*   |j
|  dtjjj| tjjjd}	t|	tj| dd| j| j Y ||d}
|	d ur|	|
d< || j| jd}tjjj|d< d|i}|jd||
|d}| || | || |	d ur| |	| | || |j|  d| | | d}||d}
|	d ur|	|
d< d| j| jd}tjjj|d< |jd||
d|id}| || | || |	d urM| |	| | || |S )Nr   r   r"  r   r   r   r   r   zCannot find the target node z% in scope, so skip adding quant node.r  r  r  r  )r   r   r  r   r  r  r   r&  r  )!r   r$   r   r   r   r   r0   r   r  r  r   r   r   r.   r"   r   rQ   rR   r  r  r   r   r   r   r  r   r  r  r   r   r   r   r   )rj   r3   r'   r5   r   r   r   r   r   r  r1   r   r   r   r   r  r,   r,   r-   r  D  s   






z8AddQuantDequantForInferencePass._insert_quant_dequant_op)r,  r-  r.  r/  ru   r   r  r  r,   r,   r,   r-   r    s    
1r  c                   @   s>   e Zd ZdZ		dddZdd Zdd	 Zd
d Zdd ZdS )AddQuantDequantForResidualz_
    Quantize the residual connections. Add quant and dequant ops for the residual inputs.
    r9   Tc                 C   sD   t || _|| _|| _|| _| jdusJ d| jdus J ddS )a  
        Args:
            scope(static.Scope): The scope is used to initialize these new parameters.
            place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors.
                If it's string, it can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
            quant_bits(int, optional): quantization bit number for weight. Default is 8.
            is_test(bool, optional): Whether quantization with training or not. Default is True.
        Nr  r  )r   rR   rQ   r  re   )rj   r)   r*   r   rs   r,   r,   r-   ru     s   
z#AddQuantDequantForResidual.__init__c                 C   s   t |ts	J d| |}| |}| D ]9}| dkr q|jd  }|jd  }||v s6||v r7q|| || k rD|jd n|jd }| ||| qdS )r  rw   r  r   r   N)r!   r   _all_weight_node_names_var_name_orderr   r$   r1   _insert_quant_dequant)rj   r3   weight_var_namesvar_node_names_with_orderrN   Zfirst_input_nameZsecond_input_nameZ	skip_noder,   r,   r-   r     s*   


z AddQuantDequantForResidual.applyc                 C   sR   dd |  D }| D ]}| dkr&|jd  r&||jd   q|S )zM
        Return a list of weight variables (including casted weight)
        c                 S   rx   r,   ry   )rM   r   r,   r,   r-   rO     r  zEAddQuantDequantForResidual._all_weight_node_names.<locals>.<listcomp>r  r   )r   r   r$   r1   r2   r   r   )rj   r3   r  rN   r,   r,   r-   r    s   z1AddQuantDequantForResidual._all_weight_node_namesc                 C   sL   |  }i }t|D ]\}}|jD ]}| }||du r"|||< qq
|S )zY
        Return a dictionary with variable names as key and their order as value
        N)Ztopology_sort	enumerater1   r$   r  )rj   r3   Zordered_opsr  idxr4   Zin_var_nodeZin_var_namer,   r,   r-   r    s   
z*AddQuantDequantForResidual._var_name_orderc           
      C   sn   t | j| j| jdd| jd}| d }| d}|j||||d\}}|	||||}	|
||	| dS )zf
        Insert per tensor quantize_linear and dequantize_linear node between var_node and op
        r   F)r   r   r  rs   z.skipr   r  N)r  rR   rQ   r  re   r$   rN   r   r  r  r   )
rj   r3   r'   rN   r  Zquant_var_namer   r   r   r   r,   r,   r-   r    s(   z0AddQuantDequantForResidual._insert_quant_dequantN)r9   T)	r,  r-  r.  r/  ru   r   r  r  r  r,   r,   r,   r-   r    s    
r  )2r   loggingnumpyr"   r   r   r   Zbase.frameworkr   r   Z	frameworkr   r   Zstaticr	   r
   r   r   r   Z
log_helperr   r  Zquant_configr   r   r   r,  INFOr  r4  r6  r  Z	_conv_opsr   r.   r7   r8   r0  r~  r  r  r  r  r  r  r  r  r  r  r  r,   r,   r,   r-   <module>   s|   
       e  o\* .H       : M )  Q