o
    piI                     @  s   d Z ddlmZ ddlZddlZddlmZmZ ddlmZ ddl	m
Z
mZ ddlmZ dddZdddZG dd deZG dd deZG dd deZG dd deejdZdS )zEDefine some layers used to export quantization model with ONNX style.    )annotationsN)_C_ops_legacy_C_ops)unique_name)in_dynamic_modein_pir_mode   )Layere4m3c                 C  s   |dkrdgt | j }| ||< ||}| d}|dkr2t|d | ddd| jS |dkrHt|d	 | d
d	d| jS t	d)Nr      float32r     @Zfloat8_e4m3fne5m2     Zfloat8_e5m2only support e4m3 or e5m2 now)
lenshapenumelreshapeastypepaddlecastclipdtypeNotImplementedError)inputscaleaxistyper   inp r#   ]/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/paddle/nn/quant/format.pyfake_fp8_quant   s$   

r%   c                 C  sz   |dkrdgt | j }| ||< ||}|dkr(| dd | | jS |dkr9| dd | | jS td)	Nr   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   )r   r   r    r!   r   r#   r#   r$   fake_fp8_dequant0   s   
r&   c                      s0   e Zd Z fddZdd Zedd Z  ZS )LinearQuanterDequanterc                   s   t    || _|| _d S N)super__init___quanter
_dequanter)selfquanterZ	dequanter	__class__r#   r$   r*   ?   s   

zLinearQuanterDequanter.__init__c                 C  s0   |}| j d ur|  |}| jd ur| |}|S r(   )r+   r,   )r-   r   outr#   r#   r$   forwardD   s   



zLinearQuanterDequanter.forwardc                 C  s"   | d usJ t t| t| S r(   )r'   LinearQuanterfrom_quanterLinearDequanterr.   r#   r#   r$   r4   L   s
   z#LinearQuanterDequanter.from_quanter__name__
__module____qualname__r*   r2   staticmethodr4   __classcell__r#   r#   r/   r$   r'   >   s
    r'   c                      :   e Zd Z				d
 fdd	Zdd Zedd	 Z  ZS )r3   N      c                     t    tj|dd}tjjtjjdtj	j
ddd}| j|j|dd| _| j| tjddd| _tjddd| _|d urE|n|}tj|dd}tjjtjjd	tj	j
ddd}| j|j|dd| _| j| |d u rwd
n|| _|| _|| _t| jtr| jd dkr| jd dkrt| jdkrd| _d| _n2| jd dkr| jd dkrt| jdkrd| _d| _ntdd| jd > d | _d
| j d | _t| jtr| jd | jd  d | _d S d S Nr   )r   zquant_dequant.scaleg      ?F)nameinitializerZ	trainable)r   attrr           zquant_dequant.zero_pointr
   r      r      r   r   r      r   r   zCurrently, only float8_e4m3 and float8_e5m2 formats are supported. Please set quant_bits to (4,3) or (5,2) for the corresponding format.r)   r*   r   Z	to_tensorZ	frameworkZ	ParamAttrutilsr   generatennrC   ConstantZcreate_parameterr   _scales	set_valuein_accumin_state_zero_point_quant_axis_bit_length_group_size
isinstancetupler   _qmin_qmaxr   r-   scales
zero_point
quant_axis
bit_lengthZ
group_sizeZ
scale_attrZzp_attrr/   r#   r$   r*   V   \   
zLinearQuanter.__init__c           
      C  s  t  r| jdkrt|| j| jddS | jdkr!t|| j| jddS t| jjdkrt| j dkrFt	
t	|d| j | j | j| j}n(t	| j| jd}t	| j| jd}t	
t	|d| | j | | j| j}||jS t|d| j| jd	| jd
| jd| jd| j|jS t rd|_t	jjjd|jtdt	jjddd}t|| j| j| j| j | j| j| j| jddd\}}}}|S | j!"|j}	| j!j#d|| j| jdd|	i| j| j| j| jdd |	S )Nr   r   r!   r   r   r   r   r   r]   r^   qminqmaxT	quant_outrE   r   r   rB   rC   stop_gradientFquantize_linearXZScaleZ	ZeroPointYr]   r^   ra   rb   r!   ZinputsZoutputsattrs)$r   rY   r%   rN   rS   r   r   rR   sumr   r   roundr   rX   repeat_interleaverU   r   r   rf   rT   r   re   pircorecreate_persistable_valuer   rK   rL   rC   rM   r   rP   rQ   _helper"create_variable_for_type_inference	append_op)
r-   r   Zquant_weightnew_snew_zprc   	out_state	out_accum	out_scaler1   r#   r#   r$   r2      s   



zLinearQuanter.forwardc                 C      t |  |  |  |  dS N)r\   r]   r^   )r3   r[   zero_pointsr]   r^   r6   r#   r#   r$   r4         zLinearQuanter.from_quanterNNr>   r?   r7   r#   r#   r/   r$   r3   U   s    <Zr3   c                      r=   )r5   Nr>   r?   c                   r@   rA   rI   rZ   r/   r#   r$   r*      r_   zLinearDequanter.__init__c           
      C  s  t  r|| jdkrt|| j| jddS | jdkr!t|| j| jddS t| jjdkr`| j dkr<|	d| j | j }nt
| j| jd}t
| j| jd}|	d| | j | }|	|jS t|	d| j| jd	| jd
| jd| jd| j	|jS t rd|_t
jjjd|jtdt
jjddd}t|| j| j| j| j| j| j| j| jddd\}}}}|S | j |j}	| jj!d|| j| jdd|	i| j| j| j| jdd |	S )Nr   r   r`   r   r   r   r   r   r]   r^   ra   rb   Trc   rE   rd   Fdequantize_linearrg   ri   rj   rk   )"r   rY   r&   rN   rS   r   r   rR   rm   r   r   ro   rU   r   r   r   rT   rX   r   re   rp   rq   rr   r   rK   rL   rC   rM   r   rP   rQ   rs   rt   ru   )
r-   r   Zquant_dequant_weightrv   rw   Zdequant_outrx   ry   rz   r1   r#   r#   r$   r2   3  s   



	
zLinearDequanter.forwardc                 C  r{   r|   )r5   r[   r}   r]   r^   r6   r#   r#   r$   r4     r~   zLinearDequanter.from_quanterr   r7   r#   r#   r/   r$   r5      s    <Vr5   c                      s\   e Zd ZdZ fddZejdddZejdd	d
ZdddZ	dd Z
dddZ  ZS )ConvertibleQuantedLayera  Abstract class to help convert quantized layer to inference model.
    It defines some functions to convert quantizers and observers to quantize
    or dequantize operators that maintain the quantization parameters used
    during inference.

    Examples:
        .. code-block:: python

            >>> # Given codes in ./customized_quanter.py
            >>> class CustomizedQuantedLayer(ConvertibleQuantedLayer):
            ...     def __init__(self):
            ...         super().__init__()
            ...         self.weight_a = paddle.create_parameter(shape=[1], dtype='float32')
            ...         self.weight_b = paddle.create_parameter(shape=[1], dtype='float32')
            ...         self.quanter_for_weight_a = None
            ...         self.activation_weight = None
            ...
            ...     def forward(self, input):
            ...         qweight_a = self.quanter_for_weight_a(self.weight_a)
            ...         weight_b = self.weight_b
            ...         qinput = self.activation_weight(input)
            ...         # compute with qweight_a, weight_b and qinput.
            ...         return qweight * qinput + weight_b
            ...
            ...     def weights_to_quanters(self):
            ...         return [('weight_a', 'quanter_for_weight_a')]
            ...
            ...     def activation_quanters(self):
            ...         return ['activation_weight']
    c                   s   t    d| _d S )NF)r)   r*   	convertedr-   r/   r#   r$   r*     s   

z ConvertibleQuantedLayer.__init__returnlist[tuple[str, str]]c                 C     dS )u  Get the name pairs of weights to be quantized and their corresponding
        quantizers. In the convert function of this abstract class, it will call
        the ‘weights_to_quanters’ function and do something as follows:
        For each pair, the quantizer will be converted to a quantize operator and
        a dequantize operator. Then, the weight will be quantized by the quantize
        operator. Finally, the quantize operator will be removed and the weights
        will be stored in integer data type.

        Returns: A list of name pairs. Each pair contains two names. The first is name of weight
        to be quantized and the second is name of corresponding quanter.
        Nr#   r   r#   r#   r$   weights_to_quanters  s   z+ConvertibleQuantedLayer.weights_to_quanters	list[str]c                 C  r   )zGet the names of quanters used to quantize activations.
        All the quanters or observers returned by this function will be converted to quantize
        and dequantize operators for deployment.
        Returns: A list of quanter names.
        Nr#   r   r#   r#   r$   activation_quanters  s   z+ConvertibleQuantedLayer.activation_quantersr'   c                 C  sH   t | |sdS t| |}|du rdS t|}t| || || j|< |S )z9Convert quanter to an instance of LinearQuanterDequanter.N)hasattrgetattrr'   r4   setattr_sub_layers)r-   quanter_namer.   r#   r#   r$   _convert_quanter_to_qdq  s   



z/ConvertibleQuantedLayer._convert_quanter_to_qdqc                 C  s    t | |}||}|| dS )z%Quantize the weight by given quanter.N)r   rO   )r-   weight_namer.   weightZqweightr#   r#   r$   _quant_weights  s   
z&ConvertibleQuantedLayer._quant_weightsFc                 C  sz   | j rJ d|  D ] \}}| |}|dur+|du r+| ||j d|_d|jd< q|  D ]}| | q0d| _ dS )z2Convert current layer to onnx style for inference.z(The model should be converted only once.NFr+   T)r   r   r   r   r+   r   r   )r-   Zremain_weightr   r   Zqdqr#   r#   r$   _convert  s   


z ConvertibleQuantedLayer._convert)r   r   )r   r   )r   r'   )F)r8   r9   r:   __doc__r*   abcabstractmethodr   r   r   r   r   r<   r#   r#   r/   r$   r     s    
r   )	metaclass)r
   r   )r   
__future__r   r   r   r   r   Zpaddle.baser   Zpaddle.frameworkr   r   Zlayer.layersr	   r%   r&   r'   r3   r5   ABCMetar   r#   r#   r#   r$   <module>   s    

 " 