o
    + i9<                     @  s   d dl mZ d dlZd dlZd dlmZ ddlmZmZmZ ddl	m
Z
 ddlmZmZmZ dd	lmZ g ZG d
d deZG dd deZG dd deZdS )    )annotationsN)_C_ops   )core	frameworkunique_name)check_variable_and_dtype)_current_expected_placein_dygraph_modein_pir_mode   )Initializerc                      s<   e Zd ZdZ					dd fddZ	ddddZ  ZS )XavierInitializera  
    This class implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio.

    This initializer is designed to keep the scale of the gradients
    approximately same in all the layers. In case of Uniform distribution,
    the range is [-x, x], where

    .. math::

        x = gain \times \sqrt{\\frac{6.0}{fan\_in + fan\_out}}

    In case of Normal distribution, the mean is 0 and the standard deviation
    is

    .. math::

       gain \times \sqrt{\\frac{2.0}{fan\_in + fan\_out}}


    Args:
        uniform (bool, optional): whether to use uniform ,if False use normal distribution. Default is True.
        fan_in (float|None, optional): fan_in for Xavier initialization. If None, it is
                inferred from the variable. Default is None.
        fan_out (float|None, optional): fan_out for Xavier initialization. If None, it is
                 inferred from the variable. Default is None.
        seed (int, optional): Random seed. Default is 0.
        gain (float, optional): Scaling Tensor. Default is 1.0.

    Note:
        It is recommended to set fan_in and fan_out to None for most cases.

    TNr         ?uniformboolfan_infloat | Nonefan_outseedintgainfloatreturnNonec                   sD   |d usJ |d usJ t    || _|| _|| _|| _|| _d S N)super__init___uniform_fan_in_fan_out_seed_gain)selfr   r   r   r   r   	__class__ h/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/nn/initializer/xavier.pyr   G   s   

zXavierInitializer.__init__varpaddle.Tensorblockpaddle.pir.Block | Nonepaddle.Tensor | Nonec              
   C  sJ  |  |}t|tjtjjfsJ t|tjjjs"t|dg dd | 	|\}}| j
du r0|n| j
}| jdu r:|n| j}| jdkrG|jj| _t|tjrT| rT|jn|j}|j}|tjjjksk|tjjjkr| jstjjj}	|jtdd|jdg||	tjjjdd	}
n|tj j!tj j"fv r| jstj j#}	|}
n|}	|}
t$ r1| jrd||fv rd
}n| j%t&'dt(||   }t)*||	| || jt+ }
n%d||fv rd
}n| j%t&'dt(||   }t+ }t),|d
|| j|	|}
|tjjjks|tjjjtj j!tj j"fv r| jst)-|
|}
t|tjr*| r*tj.j/j01|
|j2|j3}
|
4| dS t5 r| jrad||fv rCd
}n| j%t&'dt(||   }tj6*|
j|	| || jt+ }
n%d||fv rkd
}n| j%t&'dt(||   }t),|
jd
|| j|	t+ }
|tj j!tj j"fv r| jst)-|
|S |
S | jrd||fv rd
}n| j%t&'dt(||   }|j7di d|
i|
j|	| || jddd}n,d||fv rd
}n| j%t&'dt(||   }|j7dd|
i|
j|
jd
|| jddd}|tjjjks|tjjjkr | js |j7dd|
id|i|
j|dd ||_8|S )a]  Initialize the input tensor with Xavier initialization.

        Args:
            var(Tensor): Tensor that needs to be initialized.
            block(Block|None, optional): The block in which initialization ops
                   should be added. Used in static graph only, default None.

        Returns:
            The initialization op
        ZOut)Zuint16Zfloat16Zfloat32Zfloat64Zxavier_initNr   .tmpF)nameshapedtypetypeZpersistableg        g      @g       @Zuniform_random)r0   r1   minmaxr   T)r2   inputsoutputsattrsstop_gradientZgaussian_random)r0   r1   meanstdr   )r2   r6   r7   r8   castX)Zin_dtype	out_dtype)r2   r5   r6   r7   )9Z_check_block
isinstancer   ZBlockpaddleZpirr   ZParameterMetar   Z_compute_fansr   r    r!   programZrandom_seedZEagerParamBaseZis_distZ_local_shaper0   r1   ZVarDescZVarTypeZFP16ZBF16r   ZFP32Z
create_varr   generatejoinr/   ZDENSE_TENSORZDataTypeZFLOAT16ZBFLOAT16ZFLOAT32r
   r"   mathsqrtr   r   r   r	   Zgaussianr;   distributedZauto_parallelapiZdtensor_from_localZprocess_meshZ
placementsZ_share_underline_tensor_tor   Z_pir_opsZ	append_opop)r#   r(   r*   Zf_inZf_outr   r   Zout_var_shapeZorigin_dtyper=   Zout_varlimitr:   ZplacerG   r&   r&   r'   forwardX   s>  





		


	

zXavierInitializer.forward)TNNr   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   )r(   r)   r*   r+   r   r,   )__name__
__module____qualname____doc__r   rI   __classcell__r&   r&   r$   r'   r   "   s    &r   c                      ,   e Zd ZdZ				dd fddZ  ZS )XavierNormala	  
    This class implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio, using a normal distribution whose mean is :math:`0` and standard deviation is

    .. math::

        gain \times \sqrt{\frac{2.0}{fan\_in + fan\_out}}.


    Args:
        fan_in (float|None, optional): fan_in for Xavier initialization, which is
                inferred from the Tensor. Default is None.
        fan_out (float|None, optional): fan_out for Xavier initialization, which is
                 inferred from the Tensor. Default is None.
        gain (float, optional): Scaling Tensor. Default is 1.0.
        name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.

    Returns:
        A parameter initialized by Xavier weight, using a normal distribution.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> paddle.seed(1)
            >>> data = paddle.ones(shape=[3, 1, 2], dtype='float32')
            >>> weight_attr = paddle.framework.ParamAttr(
            ...     name="linear_weight",
            ...     initializer=paddle.nn.initializer.XavierNormal())
            >>> bias_attr = paddle.framework.ParamAttr(
            ...     name="linear_bias",
            ...     initializer=paddle.nn.initializer.XavierNormal())
            >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
            >>> print(linear.weight)
            Parameter containing:
            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[-0.21607460,  0.08382989],
             [ 0.29147008, -0.07049121]])

            >>> print(linear.bias)
            Parameter containing:
            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [1.06076419, 0.87684733])

            >>> res = linear(data)
            >>> print(res)
            Tensor(shape=[3, 1, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[[1.13615966, 0.89018601]],
             [[1.13615966, 0.89018601]],
             [[1.13615966, 0.89018601]]])
    Nr   r   r   r   r   r   r/   
str | Noner   r   c                      t  jd||d|d d S )NFr   r   r   r   r   r   r   r   r#   r   r   r   r/   r$   r&   r'   r   ^     

zXavierNormal.__init__NNr   N
r   r   r   r   r   r   r/   rQ   r   r   rJ   rK   rL   rM   r   rN   r&   r&   r$   r'   rP   '  s    8rP   c                      rO   )XavierUniforma	  
    This class implements the Xavier weight initializer from the paper
    `Understanding the difficulty of training deep feedforward neural
    networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
    by Xavier Glorot and Yoshua Bengio.

    This initializer is designed to keep the scale of the gradients
    approximately same in all the layers. In case of Uniform distribution,
    the range is :math:`[-x,x]`, where

    .. math::

        x = gain \times \sqrt{\frac{6.0}{fan\_in + fan\_out}}.

    Args:
        fan_in (float|None, optional): fan_in for Xavier initialization, which is
                inferred from the Tensor. Default is None.
        fan_out (float|None, optional): fan_out for Xavier initialization, which is
                 inferred from the Tensor. Default is None.
        gain (float, optional): Scaling Tensor. Default is 1.0.
        name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.

    Returns:
        A parameter initialized by Xavier weight, using a uniform distribution.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> paddle.seed(1)
            >>> data = paddle.ones(shape=[3, 1, 2], dtype='float32')
            >>> weight_attr = paddle.framework.ParamAttr(
            ...     name="linear_weight",
            ...     initializer=paddle.nn.initializer.XavierUniform())
            >>> bias_attr = paddle.framework.ParamAttr(
            ...     name="linear_bias",
            ...     initializer=paddle.nn.initializer.XavierUniform())
            >>> linear = paddle.nn.Linear(2, 2, weight_attr=weight_attr, bias_attr=bias_attr)
            >>> print(linear.weight)
            Parameter containing:
            Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[-1.18095720,  0.64892638],
             [ 0.43125069, -1.11156428]])
            >>> print(linear.bias)
            Parameter containing:
            Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [-0.27524316,  1.13808715])

            >>> res = linear(data)
            >>> print(res)
            Tensor(shape=[3, 1, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
            [[[-1.02494967,  0.67544925]],
             [[-1.02494967,  0.67544925]],
             [[-1.02494967,  0.67544925]]])
    Nr   r   r   r   r   r   r/   rQ   r   r   c                   rR   )NTr   rS   rT   rU   r$   r&   r'   r     rV   zXavierUniform.__init__rW   rX   rY   r&   r&   r$   r'   rZ   j  s    :rZ   )
__future__r   rC   r?   r   baser   r   r   Zbase.data_feederr   Zbase.frameworkr	   r
   r   Zinitializerr   __all__r   rP   rZ   r&   r&   r&   r'   <module>   s     C