o
    + i                     @  s  d dl mZ d dlZd dlmZ d dlZd dlZd dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ dd	lmZmZmZ eeejd
dZe	jjje	jjje	jjjgZdZeG dd dZ e dddddddZ!dd Z"dd Z#dd Z$da%dd Z&dd Z'dd Z(dd  Z)d!d" Z*dNd$d%Z+d&d' Z,d(d) Z-d*d+ Z.ed,d- Z/e	jjj0e	jjj1e	jjj2e	jjj3hZ4e	jjj0e	jjj1e	jjj2hZ5d.d/ Z6d0d1 Z7d2d3 Z8d4d5 Z9d6d7 Z:d8d9 Z;dOd:d;Z<dPdAdBZ=dde	jjj1dCd#fdDdEZ>dFdG Z?dHdI Z@dde	jjj1d#i fdJdKZAdLdM ZBdS )Q    )annotationsN)	dataclass)core	frameworkglobal_scope)in_pir_mode)
get_logger)signature_safe_contextmanager   )AutoMixedPrecisionLists
black_listget_low_precision_dtypestrz&%(asctime)s-%(levelname)s: %(message)s)fmtZ__use_fp16__c                   @  s>   e Zd ZU ded< ded< ded< ded< ded< ded	< d
S )
AmpOptionsboolenablezlist[str] | Nonecustom_white_listcustom_black_liststrleveldtypeuse_promoteN)__name__
__module____qualname____annotations__ r   r   h/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/static/amp/fp16_utils.pyr   /   s   
 r   TO1float16)r   r   r   r   r   r   c                 C  s4   | j }t|tr|d }||| ||| dS )z
    If an op has old_name input and output, rename these input
    args new_name.

    Args:
        op (Operator): Current operator.
        old_name (str): The old name of input args.
        new_name (str): The new name of input args.
    r   N)desc
isinstancetuple_rename_inputZ_rename_output)opZold_namenew_nameZop_descr   r   r   _rename_argC   s
   

r&   c           	      C  sd   | j D ],}|j}|j}|D ]!}||vs||v rq|jD ]}||| v r-|||| |  qqqd S N)blocksopsidxinput_arg_namesr#   )	programZop_var_rename_mapZ
origin_opskeep_fp32_opsblockr)   Zblock_idr$   namer   r   r   _rename_op_inputT   s   

r0   c                 C  s    | t jjjt jjjfv rdS dS )zx
    Convert specific variable type to its corresponding string.

    Args:
        dtype (VarType): Variable type.
    Zfp16Zfp32)r   VarDescVarTypeFP16BF16)r   r   r   r   _dtype_to_str`   s   r5   c                  G  s>   t | dkrtS t | dkrt| d tsJ t}| d a|S )Nr   r
   )len(_keep_layer_norm_scale_bias_to_fp32_flagr!   r   )args	old_valuer   r   r   #_keep_layer_norm_scale_bias_to_fp32r   s   r:   c                 C  s   | j }|dkr|dkS |dkrt r|dkS |dkr|dkS |dkr&|dvS |dkr.|dvS |d	v r6|d
v S |dkr>|dv S dS )N
batch_normX
layer_normZinstance_normfused_bn_add_activation>   r<   Zresnet_unit>   r<   ZFilterXr?   ZFilterZZfused_attentionZfused_feedforward>   ZLn2ScaleZLn2BiasLnScaleZLn1ScaleZLn1BiasLnBiasZfused_multi_transformer>   rB   Z
FFNLnScaleZ	FFNLnBiasrC   Ftyper:   )r$   in_nameop_typer   r   r   _keep_fp32_input}   s    rH   c                 C  sP   | j }|dv r|dkS |dkrt r|dkS |dkr|dvS |dv r&|dv S dS )	N)r;   r>   Yr=   r@   >   ZConvXrI   ZConvZrA   >   ZLn1VarianceZLnMeanZLn2MeanZLn2VarianceZLn1MeanZ
LnVarianceFrD   )r$   out_namerG   r   r   r   _keep_fp32_output   s   rK   c                 C  s  d}|j D ]}|tjkrt||rq||D ]}| |}|jtvs(|j|kr)q|j	d t
| }	| |	}
|
rG|
j|krGt||j	|
j	 q|j|kr| j|	}|du s[|j|kr|d}|tjkr|jrd}|j|u rwt| j||}n|jdur|j}d}|dur|d}|durd|v r|}| j|	|d|jd}|dsttjjjn|d}| j|d	d
|id|i|j|j||dd |d7 }t||j	|j	 qqdD ]}||r||tv r||| q|S )a  
    Insert cast op and rename op's input.

    Args:
        block (Program): The block in which the operator is.
        op (Operator): The operator to insert cast op.
        idx (int): The index of current operator.
        src_dtype (VarType): The input variable dtype of cast op.
        dest_dtype (VarType): The output variable dtype of cast op.

    Returns:
        num_cast_op (int): The number of cast ops that have been inserted.
    r   z.cast_N	op_deviceallF)r/   r   Zpersistablestop_gradientop_rolecastr<   Out)in_dtype	out_dtyperL   rO   )rE   inputsoutputsattrsr
   )rR   rS   r   )input_namespaddlefloat32rH   input_find_var_recursiverE   _valid_typesr   r/   r5   r&   varsgetattrrN   r$   find_true_prev_opr)   Z
create_varhas_attrintr   op_proto_and_checker_makerOpRoleForwardZ_insert_op_without_syncFLOAT_TYPES	_set_attr)r.   r$   r*   Z	src_dtypeZ
dest_dtypenum_cast_opsrF   in_var_namein_varZ	cast_nameZ
casted_varout_varrL   prev_opZprev_op_devicerO   	attr_namer   r   r   _insert_cast_op   sz   







Ern   c                 C  st   g }| D ]}||kr n|j D ]}||D ]}||kr!|| qqq|r8t|dks4td| d|d S dS )z
    Find the true prev op that outputs var_name variable.

    Args:
        ops (list): A list of ops.
        cur_op (Operator): Current operator which has var_name variable.
        var_name (string): Variable name.
    r
   z0There must be only one previous op that outputs z	 variabler   N)output_namesoutputappendr6   
ValueError)r)   cur_opvar_namerl   r$   rJ   Zout_var_namer   r   r   r`     s(   	

r`   Fc           
      C  s   g }|r	 d}nt | D ]
\}}||kr nqt|d t| D ]}| | }|jD ]}||D ]}	|	|kr;|| q0q)q |S )a_  
    if there are post ops, return them, if there is no post op,
    return None instead.
    Args:
        ops (list): A list of ops.
        cur_op (Operator): Current operator which has var_name variable.
        var_name (string): Variable name.
        search_all (bool): The type of operator search. Use if "cur_op" is not in the "ops" set.
    r
   )	enumerateranger6   rW   rZ   rq   )
r)   rs   rt   Z
search_allZpost_opr*   r$   irF   ri   r   r   r   find_true_post_op'  s$   


ry   c                 C  s,   t |  D ]}|| |kr|  S qdS ) ru   )rw   Zop_sizer$   )Z
block_descZcur_op_descr*   r   r   r   find_op_indexJ  s
   r{   c                 C  s<   | j D ]
}||jv r dS q| jD ]
}||jv r dS qdS NTF)r+   black_varnamesoutput_arg_names)r$   	amp_listsrF   rJ   r   r   r   _is_in_black_varnamesR  s   



r   c                 C  sr   | j |v rdS g }|t| j |t| j |D ]	}d|v r$ dS q|r7| dr5t| dv r5dS dS dS )NTZlearning_rateZop_namescopeF)rE   extendlistr+   r~   ra   _fp16_guard_patternr_   )r$   Zunsupported_op_listuse_fp16_guardZin_out_arg_namesr/   r   r   r   _need_keep_fp32^  s   

r   c                   c  s:    t jtd dV  W d   dS 1 sw   Y  dS )aq  
    As for the pure fp16 training, if users set `use_fp16_guard` to True,
    only those ops created in the context manager `fp16_guard` will be
    transformed as float16 type.

    Examples:
        .. code-block:: python

            >>> import numpy as np
            >>> import paddle
            >>> import paddle.nn.functional as F
            >>> paddle.enable_static()
            >>> data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
            >>> conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)

            >>> with paddle.static.amp.fp16_guard():
            ...     bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
            ...     pool = F.max_pool2d(bn, kernel_size=2, stride=2)
            ...     hidden = paddle.static.nn.fc(pool, size=10)
            ...     loss = paddle.mean(hidden)
    )prefixN)r   Z
name_scoper   r   r   r   r   
fp16_guardy  s   "r   c           
      C  s   t  }|D ]f}d }z||}W n- ty= }	 z!td|	 d ||}|d ur3td| d W Y d }	~	nd }	~	ww |d u sG|jtvrHq|jt	v rZ|
| |rZ|j| td| j d| d|j d q|S )	Nz-- z&, try to get it in the global block --z-- var z is got in the global block --z---- op type: z, var name: z, var dtype: z ----)set_var_recursiverr   _loggerdebugvarrE   r\   r   rf   addr    	set_dtype)
r$   Z	var_namesr.   global_blockr   need_set_dtypelow_precision_var_namesrt   r   er   r   r   set_var_dst_dtype  s0   



r   c                 C  s   t  }|dks|dkr|S g }| jD ]E}||  |j}|D ]6}	|	j|jv r)q t|	|j|r?|	j	D ]
}
|
|	|
}q3q |	j	D ]}
t sUt|	|
rU|
|	|
}qBq q|D ]}|j|vrttd|j d| d |j| qZ|S )Nr   ODz-- set param  to z --.)r   r(   r   all_parametersr)   rE   r   r   unsupported_listrW   unionrZ   r   is_compiled_with_ipurH   r/   r   r   r    r   )r,   r   r   r   r   keep_fp32_var_namesr   r.   r)   r$   rF   paramr   r   r   set_param_dtype  s@   




r   c                 C  s   d}t  }t| |j|rd}||fS |jd ur"t| |r"d}||fS | j|jv rDd}| jD ]}|D ]}|j| 	|v rB|
|jg}q1q-||fS )NFT)r   r   r   r}   r   rE   r   rW   r/   rZ   r   )r$   r   r   Zparams_listZneed_keep_fp32Zfp16_varname_list_in_fp32_oprF   paramsr   r   r   op_need_keep_fp32  s0   

r   c              	   C  s   |}| j D ]?}t s t| |r td| d| | d q|r?| |D ]}||}|r=|jt	j
kr=tjjj} nq'qtjjj}q|S )Nz---- Input rz   z should be kept fp32 ----)rW   r   r   rH   r   r   rZ   r[   r   rX   rY   r1   r2   FP32)r$   	amp_dtyper.   	dst_dtyperF   ri   rj   r   r   r   get_promote_dtype  s    


r   c                 C  s   |dkr|S |j }|}| j|jv rbd}	d}
| jD ]G}|r_| |D ]=}||}|jd u r.q!|j| u r?t|| |}|d u r>q!n|j}||v sL|j|jv rRt	j
jj}q!||v s\|j|jv r^|}q!q|S t	j
jj}|S )NO2F)r)   rE   Z	gray_listrW   rZ   r[   r$   r`   r   r   r1   r2   r   
white_list)r$   r   r   r.   r   r-   keep_fp16_opsr)   r   Z	keep_fp32Z	keep_fp16rF   ri   rj   rl   r   r   r   get_amp_dst_dtype  s:   




r   c              	   C  s   t  }| jD ]}t st| |rqt| | ||||dd}||}q| jD ]}t s4t	| |r4q(t| | 
||||dd q(|S )NF)r   T)r   rW   r   r   rH   r   rZ   r   ro   rK   rp   )r$   r.   r   r   r   rF   Zin_varsrJ   r   r   r   process_op_input_and_outputsA  s2   

r   c                 C  sJ   || | | j }| jD ]}|dsq|j|dj }t||| qd S )N	sub_block)r,   r)   ra   r(   r_   id	map_block)r.   fn	parent_opr,   r$   r   r   r   r   r   c  s   


r   r,   paddle.static.Programamp_records,dict[int, list[tuple[AmpOptions, int, int]]]global_amp_optionsc                   sB   i  fdd}t |  |  D ]	\}}|| qd S )Nc           	        sb   | j }| j}|D ]&}|}| v r* | D ]\}}}|j t||v r)|} nq||< qd S r'   )r*   r)   r^   rw   )	r.   r   Z	block_idxr)   r$   Zcurrent_op_amp_optionsZamp_optionsstartendr   r   Zop_amp_options_mapr   r   fill_amp_enable_op_mapt  s   
z6prepare_op_amp_options.<locals>.fill_amp_enable_op_map)r   r   itemsZset_amp_options)r,   r   r   r   r$   r   r   r   r   prepare_op_amp_optionsm  s   r   r   c                 C  sh  t d t |  |du rt|}t|}|dkr |jt |_|dkr7|dur0t|}t|}|j|j |_|  }t }t }	t }
t }t	| ||||d}|
|}dd }| jD ]}|j}|D ]}t d| d	 ||sxt d
 qc| }t||||\}}|

|}
|r|| t|||tjjj t d qc|j|jv r|	| t||||}|

|}
t d qc|jdkr||dd }||dd }|d|j |jt|d t d|j|dd |j|dd |j|d|d qc|}|st |||||||	}nt!|||}||kr?|	| t||||}|

|}
t d qc|| t|||tjjj t d qcq\| jD ]J}|j}d}|t"|k r|| }d}||	v rt#|||tjjj|}||7 }||v rt#||||tjjj}||7 }||d 7 }|t"|k sfqXt d t |  |
$| |
S )a  
    Traverse all ops in the whole model and set their inputs and outputs
    to the fp16 data type. This function will do some special process for
    the batch normalization, which keeps the computational process of
    batchnorms in FP32.
    Args:
        program (Program): The used program.
        amp_lists (AutoMixedPrecisionLists): An AutoMixedPrecisionLists object.
        use_fp16_guard(bool): Determine whether to use `fp16_guard` when
                              constructing the program. Default True.
        dest_type(core.VarDesc.VarType): the cast type. such as core.VarDesc.VarType.FP16 and core.VarDesc.VarType.BF16.
    z#---- before cast model to fp16 ----Nr   r   )r   r   r   r   c                   s   d}fdd t jdkrt fddjD rdS jdv r$|S jd	v r-d}|S d
D ]}dsD|rD|tv rDd}q/|S )NTc                   s>    j | sdS  j | jtjjjkrdS  j | jt	v S r|   )
r.   r[   r   rE   r   r1   r2   DENSE_TENSORr   SUPPORT_FLOAT_TYPES)r/   )r$   r   r   is_support_type  s   zAcast_model_to_fp16.<locals>.need_process.<locals>.is_support_typer   c                 3  s    | ]} | V  qd S r'   r   ).0r/   )r   r   r   	<genexpr>  s    

z;cast_model_to_fp16.<locals>.need_process.<locals>.<genexpr>F)	set_value)Zcreate_py_readerread)rS   r   rR   )r6   r+   rM   rE   ra   r_   rf   )r$   need_processrm   r   )r   r$   r   r     s&   



z(cast_model_to_fp16.<locals>.need_processz-- process op: z  --z/---- The op does not need to be processed ----.zE---- Add into keep_fp32_ops because the op needs to be kept fp32 ----z=---- Add into keep_fp16_ops because the op in white_list ----rP   r<   r   rQ   rR   rS   zq---- op type: {}, in var [name: {} dtype: {}], out var [name: {} dtype: {}], attr [in_dtype {} out_dtype {}] ----zG----  Add into keep_fp16_ops because it should be promoted to fp16 ----zG----  Add into keep_fp32_ops because it should be promoted to fp32 ----r
   z"---- after cast model to fp16 ----)%r   r   r   r   r   Zall_listr   r   r   r   r   r(   r)   r   r   r   r   r   r1   r2   r   rE   r[   rZ   rp   rg   r   r    r   rX   r_   formatr   r   r6   rn   difference_update)r,   r   r   	dest_typer   r   r   r   r-   r   to_fp16_var_namesr   Zfp32_var_namesr   r.   r)   r$   
all_paramsZop_keep_fp32Zfp16_var_names_in_fp32_opfp16_var_namesrj   rk   r   r*   rh   Zin_var_cast_numr   r   r   cast_model_to_fp16  s  



'









S




r   c                 C  s:   t   t|  t |}t |t j }t   |S r'   )	rX   disable_staticr   _set_expected_place	to_tensorrP   bfloat16numpyenable_static)place
fp32_arrayZfp32_tensorZ
bf16_arrayr   r   r   _convert_float_to_bfloat16Y  s   

r   c                 C  sP   t   t|  t |}t |t j }t r"|	 
dd t   |S )NZmaster_grad_castT)rX   r   r   r   r   rP   rY   r   r   Zget_defining_opZset_bool_attrr   )r   Z	org_arrayZ
org_tensorr   r   r   r   _convert_to_floatb  s   

r   c              	   C  s(  g }|j D ]	}||  qt|}	|r|nt }
|r|nt }|D ]n}|j|
v rtd|j d|	 d|   |	|jr|	|j
 }t|}|tjkr]t| |}|||  nt|}|||  |r|j|v r|	||j j}|
 }t| |}|||  q#td|j  q#dS )aQ  
    Traverse all parameters in the whole model and set them to the FP16 data type.
    Whereas, this function will keep parameters of batchnorms in FP32.
    Args:
        place(base.CPUPlace|base.CUDAPlace): `place` is used to restore the FP16 weight tensors.
        program (Program): The used program.
        scope(base.Scope, optional): `scope` is used to get the FP32 weight tensor values.
                                      Default is None.
        to_fp16_var_names(set|list, optional): The data types of vars in `to_fp16_var_names`
                                               will be set to FP16. Usually, it is the returned
                                               value of `cast_model_to_fp16` API.
        dest_type(core.VarDesc.VarType): the cast type. such as core.VarDesc.VarType.FP16 and core.VarDesc.VarType.BF16.
    z-- cast r   z, place is zCannot find N)r(   r   r   r   r   r   r/   r   r   Zfind_varZ
get_tensornparrayrX   r   r   r   r   warning)r   r,   scoper   r   Zrewrite_master_weightZmaster_weightsr   r.   Z	dtype_strr   Z	var_scoper   Zparam_tdataZp_arrayZmaster_p_varZ
master_p_tZmaster_p_arrayr   r   r   cast_parameters_to_fp16m  s<   







r   c              	   C  s  |   }|  tjjj}tjjj}|D ]\}}|j}|jt	j
kr|jdkr|d}|t|@ r<|dr<|d ntd| d||jd d }	t|j||	}
tj }|j|	g}|
|rl||
| |
|| |d| ||jd krqt|j||j}|rtd| d|d  |j }||j tj||d	d	d	d	d
}|j| t |j|j}|dkrtd| d|j!|dd q|  d	S )a  
    Update op_role_var attr for some ops to make sure the gradients
    transferred across GPUs is FP16.
    1. Check whether the op that outputs gradient is cast or not.
    2. If op is cast and gradient is FP32, remove the op_role_var
       and find the prev op which outputs FP16 gradient
    3. Update the op_role_var of the prev op.

    Args:
        main_prog (Program): The main program for training.
        params_grads (list): A list of params and grads.
    rP   rO   Zop_role_varzThe cast op z4 must be in BACKWARD role and have op_role_var attr.r   ru   zH's output should not beused by a non-optimize op, however, itis used by N)r.   r    rE   rT   rU   rV   zThe op z is not in programF)sync)"r   Z_sync_with_cppr   rc   rd   ZBackwardZOptimizer$   r   rX   rY   rE   r_   rb   ra   Z_remove_attrrr   rZ   rW   r`   r)   ZkOpRoleVarAttrNamer/   r   rg   ry   r    Z	append_opZ	copy_fromr   Operatorrq   r{   Z
_remove_op)Z	main_progZparams_gradsr.   ZBACKWARDZOPTIMIZEpgr$   ZroleZfp16_grad_nameZop_for_fp16_gradZop_role_var_attr_nameZattr_valZpost_opsZnew_op_descZnew_opZop_idxr   r   r   update_role_var_grad  s`   






r   )Fr'   )r,   r   r   r   r   r   )C
__future__r   loggingdataclassesr   r   r   rX   Zpaddle.baser   r   r   Zpaddle.base.frameworkr   Zpaddle.base.log_helperr   Zpaddle.base.wrapped_decoratorr	   Z
fp16_listsr   r   r   r   INFOr   r1   r2   r   ZSELECTED_ROWSZDENSE_TENSOR_ARRAYr\   r   r   ZDEFAULT_AMP_OPTIONSr&   r0   r5   r7   r:   rH   rK   rn   r`   ry   r{   r   r   r   r   r3   r4   ZFP64rf   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s   	
_
#
%,
"


 R	
7