o
    + i                     @  s   d dl mZ d dlZd dlmZ d dlmZmZ ddlm	Z	 ddl
mZ ddlmZ d	d
lmZ erVd dlmZ d dlmZ d dlmZ d dlmZ d	dlmZ d	dlmZ g ZG dd deZdS )    )annotationsN)TYPE_CHECKING)_C_opspir   )	framework)no_grad)in_dynamic_or_pir_mode   )	Optimizer)Sequence)Tensor)GradientClipBase)WeightDecayRegularizer)LRScheduler)_ParameterConfigc                      sV   e Zd ZU dZded< 						dd fddZdd Zedd Zdd Z	  Z
S )SGDa	  
    Optimizer of the stochastic gradient descent algorithm.

    .. math::

        param\_out = param - learning\_rate * grad

    Parameters:
        learning_rate (float|LRScheduler, optional): The learning rate used to update ``Parameter``.
            It can be a float value or a LRScheduler. The default value is 0.001.
        parameters (list|tuple|None, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
            The default value is None in static graph mode, at this time all parameters will be updated.
        weight_decay (int|float|WeightDecayRegularizer|None, optional): The strategy of regularization. \
            It can be a int or float value as coeff of L2 regularization or \
            :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.
            If a parameter has set regularizer using :ref:`api_paddle_ParamAttr` already, \
            the regularization setting here in optimizer will be ignored for this parameter. \
            Otherwise, the regularization setting here in optimizer will take effect. \
            Default None, meaning there is no regularization.
        grad_clip (GradientClipBase|None, optional): Gradient clipping strategy, it's an instance of
            some derived class of ``GradientClipBase`` . There are three clipping strategies
            ( :ref:`api_paddle_nn_ClipGradByGlobalNorm` , :ref:`api_paddle_nn_ClipGradByNorm` ,
            :ref:`api_paddle_nn_ClipGradByValue` ). Default None, meaning there is no gradient clipping.
        multi_precision (bool, optional): Whether to use multi-precision during weight updating.
        name (str|None, optional): The default value is None. Normally there is no need for user
                to set this property. For more information, please refer to
                :ref:`api_guide_Name` .

    Examples:
        .. code-block:: python

            >>> import paddle

            >>> inp = paddle.uniform(min=-0.1, max=0.1, shape=[10, 10], dtype='float32')
            >>> linear = paddle.nn.Linear(10, 10)
            >>> inp = paddle.to_tensor(inp)
            >>> out = linear(inp)
            >>> loss = paddle.mean(out)
            >>> sgd = paddle.optimizer.SGD(
            ...     learning_rate=0.1,
            ...     parameters=linear.parameters(),
            ...     weight_decay=0.01
            ... )
            >>> out.backward()
            >>> sgd.step()
            >>> sgd.clear_grad()

    strtypeMbP?NFlearning_ratefloat | LRScheduler
parameters4Sequence[Tensor] | Sequence[_ParameterConfig] | Noneweight_decay%float | WeightDecayRegularizer | None	grad_clipGradientClipBase | Nonemulti_precisionboolname
str | NonereturnNonec                   s<   |d u rt dt j|||||d d| _|| _i | _d S )Nzlearning_rate is not set)r   r   r   r   r    Zsgd)
ValueErrorsuper__init__r   _multi_precision_master_weights)selfr   r   r   r   r   r    	__class__ `/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/optimizer/sgd.pyr&   ]   s   	
zSGD.__init__c                 C  s   t |tjtjfsJ t |tr| |}|D ]-}|j| jv r q| jr6| 	|j
r6| |}| j|j q| 	|j
rD| jsDtd qd S )NzAccumulating with FP16/BF16 in optimizer can lead to poor accuracy or slow convergence.Consider using multi_precision=True option of the Adam optimizer.)
isinstancer   Blockr   dict_update_param_groupr    Z_already_create_accumulatorr'   _is_dtype_fp16_or_bf16dtypeZ_create_master_weightaddwarningswarn)r)   blockr   pZmaster_pr,   r,   r-   _create_accumulatorss   s&   



zSGD._create_accumulatorsc           
      C  s   t |tr
| |}| jo| |d j}|r| j|d j nd }| |}t	 r8t
|d ||d || d S t |tjs@J |d |d |d}d|d i}d|i}|r^||d< ||d< |j| j|||dd	}	|	S )
Nr   r
   )ParamZGradZLearningRateZParamOutr   ZMasterParamZMasterParamOutT)r   inputsoutputsattrsZstop_gradient)r.   r0   r1   r'   r2   r3   r(   r    Z_create_param_lrr	   r   Zsgd_r   r/   Z	append_opr   )
r)   r7   Zparam_and_gradZfind_masterZmaster_weightlrr;   r<   r=   Zsgd_opr,   r,   r-   _append_optimize_op   sJ   



zSGD._append_optimize_opc                 C  s   | d}|S )Nparams)get)r)   r   r,   r,   r-   r1      s   
zSGD._update_param_group)r   NNNFN)r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   )__name__
__module____qualname____doc____annotations__r&   r9   r   r?   r1   __classcell__r,   r,   r*   r-   r   (   s   
 2
2r   )
__future__r   r5   typingr   Zpaddler   r   baser   Zbase.dygraphr   Zbase.frameworkr	   Z	optimizerr   collections.abcr   r   Zpaddle.nn.clipr   Zpaddle.regularizerr   r>   r   r   __all__r   r,   r,   r,   r-   <module>   s"   