o
    + i9<  ã                   @  s    d dl mZ d dlZd dlZd dlmZ ddlmZmZmZ ddl	m
Z
 ddlmZmZmZ dd	lmZ g ZG d
d„ deƒZG dd„ deƒZG dd„ deƒZdS )é    )ÚannotationsN)Ú_C_opsé   )ÚcoreÚ	frameworkÚunique_name)Úcheck_variable_and_dtype)Ú_current_expected_placeÚin_dygraph_modeÚin_pir_modeé   )ÚInitializerc                      s<   e Zd ZdZ					dd‡ fdd„Z	dddd„Z‡  ZS )ÚXavierInitializera  
    This class implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio.

    This initializer is designed to keep the scale of the gradients
    approximately same in all the layers. In case of Uniform distribution,
    the range is [-x, x], where

    .. math::

        x = gain \times \sqrt{\\frac{6.0}{fan\_in + fan\_out}}

    In case of Normal distribution, the mean is 0 and the standard deviation
    is

    .. math::

       gain \times \sqrt{\\frac{2.0}{fan\_in + fan\_out}}


    Args:
        uniform (bool, optional): whether to use uniform ,if False use normal distribution. Default is True.
        fan_in (float|None, optional): fan_in for Xavier initialization. If None, it is
                inferred from the variable. Default is None.
        fan_out (float|None, optional): fan_out for Xavier initialization. If None, it is
                 inferred from the variable. Default is None.
        seed (int, optional): Random seed. Default is 0.
        gain (float, optional): Scaling Tensor. Default is 1.0.

    Note:
        It is recommended to set fan_in and fan_out to None for most cases.

    TNr   ç      ð?ÚuniformÚboolÚfan_inúfloat | NoneÚfan_outÚseedÚintÚgainÚfloatÚreturnÚNonec                   sD   |d usJ ‚|d usJ ‚t ƒ  ¡  || _|| _|| _|| _|| _d S ©N)ÚsuperÚ__init__Ú_uniformÚ_fan_inÚ_fan_outÚ_seedÚ_gain)Úselfr   r   r   r   r   ©Ú	__class__© úh/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/nn/initializer/xavier.pyr   G   s   

zXavierInitializer.__init__Úvarúpaddle.TensorÚblockúpaddle.pir.Block | Noneúpaddle.Tensor | Nonec              
   C  sJ  |   |¡}t|tjtjjfƒsJ ‚t|tjjjƒs"t|dg d¢dƒ |  	|¡\}}| j
du r0|n| j
}| jdu r:|n| j}| jdkrG|jj| _t|tjƒrT| ¡ rT|jn|j}|j}|tjjjksk|tjjjkrˆ| jsˆtjjj}	|jt d d|jdg¡¡||	tjjjdd	}
n|tj j!tj j"fv rœ| jsœtj j#}	|}
n|}	|}
t$ƒ r1| jrÌd||fv r°d
}n| j%t& 'dt(|| ƒ ¡ }t) *||	| || jt+ƒ ¡}
n%d||fv rÕd
}n| j%t& 'dt(|| ƒ ¡ }t+ƒ }t) ,|d
|| j|	|¡}
|tjjjks|tjjjtj j!tj j"fv r| jst) -|
|¡}
t|tjƒr*| ¡ r*tj.j/j0 1|
|j2|j3¡}
|
 4|¡ dS t5ƒ r| jrad||fv rCd
}n| j%t& 'dt(|| ƒ ¡ }tj6 *|
j|	| || jt+ƒ ¡}
n%d||fv rkd
}n| j%t& 'dt(|| ƒ ¡ }t) ,|
jd
|| j|	t+ƒ ¡}
|tj j!tj j"fv r›| js›t) -|
|¡S |
S | jrÏd||fv r«d
}n| j%t& 'dt(|| ƒ ¡ }|j7di d|
i|
j|	| || jdœdd}n,d||fv rÙd
}n| j%t& 'dt(|| ƒ ¡ }|j7dd|
i|
j|
jd
|| jdœdd}|tjjjks|tjjjkr | js |j7dd|
id|i|
j|dœd ||_8|S )a]  Initialize the input tensor with Xavier initialization.

        Args:
            var(Tensor): Tensor that needs to be initialized.
            block(Block|None, optional): The block in which initialization ops
                   should be added. Used in static graph only, default None.

        Returns:
            The initialization op
        ZOut)Zuint16Zfloat16Zfloat32Zfloat64Zxavier_initNr   Ú.ÚtmpF)ÚnameÚshapeÚdtypeÚtypeZpersistableg        g      @g       @Zuniform_random)r0   r1   ÚminÚmaxr   T)r2   ÚinputsÚoutputsÚattrsÚstop_gradientZgaussian_random)r0   r1   ÚmeanÚstdr   )r2   r6   r7   r8   ÚcastÚX)Zin_dtypeÚ	out_dtype)r2   r5   r6   r7   )9Z_check_blockÚ
isinstancer   ZBlockÚpaddleZpirr   ZParameterMetar   Z_compute_fansr   r    r!   ÚprogramZrandom_seedZEagerParamBaseZis_distZ_local_shaper0   r1   ZVarDescZVarTypeZFP16ZBF16r   ZFP32Z
create_varr   ÚgenerateÚjoinr/   ZDENSE_TENSORZDataTypeZFLOAT16ZBFLOAT16ZFLOAT32r
   r"   ÚmathÚsqrtr   r   r   r	   Zgaussianr;   ÚdistributedZauto_parallelÚapiZdtensor_from_localZprocess_meshZ
placementsZ_share_underline_tensor_tor   Z_pir_opsZ	append_opÚop)r#   r(   r*   Zf_inZf_outr   r   Zout_var_shapeZorigin_dtyper=   Zout_varÚlimitr:   ZplacerG   r&   r&   r'   ÚforwardX   s>  
ü


ÿÿý
ÿù
ÿÿú	ú	ýÿú

ÿÿ
ÿú	ú
ÿÿûõûöþü
zXavierInitializer.forward)TNNr   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   )r(   r)   r*   r+   r   r,   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   rI   Ú__classcell__r&   r&   r$   r'   r   "   s    &úÿr   c                      ó,   e Zd ZdZ				dd‡ fdd„Z‡  ZS )ÚXavierNormala	  
    This class implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio, using a normal distribution whose mean is :math:`0` and standard deviation is

    .. math::

        gain \times \sqrt{\frac{2.0}{fan\_in + fan\_out}}.


    Args:
        fan_in (float|None, optional): fan_in for Xavier initialization, which is
                inferred from the Tensor. Default is None.
        fan_out (float|None, optional): fan_out for Xavier initialization, which is
                 inferred from the Tensor. Default is None.
        gain (float, optional): Scaling Tensor. Default is 1.0.
        name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.

    Returns:
        A parameter initialized by Xavier weight, using a normal distribution.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> paddle.seed(1)
            >>> data = paddle.ones(shape=[3, 1, 2], dtype='float32')
            >>> weight_attr = paddle.framework.ParamAttr(
            ...     name="linear_weight",
            ...     initializer=paddle.nn.initializer.XavierNormal())
            >>> bias_attr = paddle.framework.ParamAttr(
            ...     name="linear_bias",
            ...     initializer=paddle.nn.initializer.XavierNormal())
            >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
            >>> print(linear.weight)
            Parameter containing:
            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[-0.21607460,  0.08382989],
             [ 0.29147008, -0.07049121]])

            >>> print(linear.bias)
            Parameter containing:
            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [1.06076419, 0.87684733])

            >>> res = linear(data)
            >>> print(res)
            Tensor(shape=[3, 1, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[[1.13615966, 0.89018601]],
             [[1.13615966, 0.89018601]],
             [[1.13615966, 0.89018601]]])
    Nr   r   r   r   r   r   r/   ú
str | Noner   r   c                   ó   t ƒ jd||d|d d S )NFr   ©r   r   r   r   r   ©r   r   ©r#   r   r   r   r/   r$   r&   r'   r   ^  ó   

ÿzXavierNormal.__init__©NNr   N©
r   r   r   r   r   r   r/   rQ   r   r   ©rJ   rK   rL   rM   r   rN   r&   r&   r$   r'   rP   '  s    8ûrP   c                      rO   )ÚXavierUniforma†	  
    This class implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio.

    This initializer is designed to keep the scale of the gradients
    approximately same in all the layers. In case of Uniform distribution,
    the range is :math:`[-x,x]`, where

    .. math::

        x = gain \times \sqrt{\frac{6.0}{fan\_in + fan\_out}}.

    Args:
        fan_in (float|None, optional): fan_in for Xavier initialization, which is
                inferred from the Tensor. Default is None.
        fan_out (float|None, optional): fan_out for Xavier initialization, which is
                 inferred from the Tensor. Default is None.
        gain (float, optional): Scaling Tensor. Default is 1.0.
        name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.

    Returns:
        A parameter initialized by Xavier weight, using a uniform distribution.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> paddle.seed(1)
            >>> data = paddle.ones(shape=[3, 1, 2], dtype='float32')
            >>> weight_attr = paddle.framework.ParamAttr(
            ...     name="linear_weight",
            ...     initializer=paddle.nn.initializer.XavierUniform())
            >>> bias_attr = paddle.framework.ParamAttr(
            ...     name="linear_bias",
            ...     initializer=paddle.nn.initializer.XavierUniform())
            >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
            >>> print(linear.weight)
            Parameter containing:
            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[-1.18095720,  0.64892638],
             [ 0.43125069, -1.11156428]])
            >>> print(linear.bias)
            Parameter containing:
            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [-0.27524316,  1.13808715])

            >>> res = linear(data)
            >>> print(res)
            Tensor(shape=[3, 1, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[[-1.02494967,  0.67544925]],
             [[-1.02494967,  0.67544925]],
             [[-1.02494967,  0.67544925]]])
    Nr   r   r   r   r   r   r/   rQ   r   r   c                   rR   )NTr   rS   rT   rU   r$   r&   r'   r   £  rV   zXavierUniform.__init__rW   rX   rY   r&   r&   r$   r'   rZ   j  s    :ûrZ   )Ú
__future__r   rC   r?   r   Úbaser   r   r   Zbase.data_feederr   Zbase.frameworkr	   r
   r   Zinitializerr   Ú__all__r   rP   rZ   r&   r&   r&   r'   Ú<module>   s     C