o
    + iL1                     @   sx   d dl Z d dlZd dlZd dlmZ eeejddZG dd dZdd Z	d	d
 Z
dd Zdd Zdd ZdddZdS )    N)
get_loggerz&%(asctime)s-%(levelname)s: %(message)s)fmtc                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
OperatorStatsUnitc                 C   s"   d | _ d| _d| _d| _d| _d S )Nr   )op_type
fp32_calls
fp16_calls
bf16_callsother_callsself r   g/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/static/amp/debugging.py__init__   s
   
zOperatorStatsUnit.__init__c                 C   sv   |d u r| j d | _ d S |tjkr| jd | _d S |tjkr&| jd | _d S |tjkr3| jd | _d S | j d | _ d S )N   )r	   paddleZfloat32r   Zfloat16r   Zbfloat16r   )r   dtyper   r   r   update"   s   


zOperatorStatsUnit.updatec                 C   sD   |  j |j 7  _ |  j|j7  _|  j|j7  _|  j|j7  _d S N)r   r   r   r	   )r   Zanotherr   r   r   addto/   s   zOperatorStatsUnit.addtoc                 C   s   | j | j| j| jgS r   )r   r   r   r	   r
   r   r   r   convert_to_list5   s
   z!OperatorStatsUnit.convert_to_listN)__name__
__module____qualname__r   r   r   r   r   r   r   r   r      s
    r   c                 C   s@   | t jjjjjt jjjjjt jjjjjt jjjjjfv rdS dS )NTF)	r   basecoreZVarDescZVarTypeZFP64ZFP32ZFP16ZBF16r   r   r   r   _is_floating_point>   s   r   c                 C   s~   |r| |n||}t|tsJ t|dkrd S |d }z	| |}|jW S    td	|j
|r6dnd|| Y d S )Nr   z*Operator < {} > gets {} < {} : {} > error!inputoutput)r   r   
isinstancelistlenZ_var_recursiver   _loggerwarningformattype)blockopZarg_nameZis_inputZ	var_namesvar_namevarr   r   r   _get_var_dtype_from_blockJ   s   
r*   c              
   C   s   d }d }| j D ]:}t|| |d}|d u rq|d u r|}q||krAt|r;t|r;td| j d| j  d| j d qt|rA|}q| jD ]3}t|| |d}|d u rSqE|d u rZ|}qE||krxt|rxt|rxtd| j d| j  d| j d qE|S )NTzOperator < z1 > has different input data types, input_names = z, output_names = .Fz: > has different input / output data types, input_names = )input_namesr*   r   r"   r#   r%   Zoutput_names)r'   r&   r(   compute_dtypeZin_nameZ	var_dtypeZout_namer   r   r   _extract_compute_dtype]   sB   



r.   c                 C   sR   i }| D ]"}|  D ]\}}||d d u rt|||< q
|| | q
q|S r   )itemsgetcopyr   )op_stats_listZmerged_op_stats_dictZeach_op_stats_dictr   unitr   r   r   _merge_op_stats   s   r4   c                 C   s   dd }g }| j D ]S}i }|jD ]F}||jd d u r$t }|||j< n||j }|jdv r1d }n||jr>t||dd}nd|jv rKt||dd}nt||}|j|d q|	| q	|S )Nc                 S   s,   g d}| |v r
dS |  dd|v rdS dS )N)castZ
batch_normZinstance_normZ
layer_normTZ_grad F)replace)r   Zspecial_op_listr   r   r   _is_special_ops_with_input_x   s   z8_get_op_stats_list.<locals>._is_special_ops_with_input_x)Zcreate_py_readerreadZcreate_double_buffer_readerXTParamr   )
blocksopsr0   r%   r   r*   r,   r.   r   append)programr8   r2   r&   Zblock_op_stats_dictr'   r3   r-   r   r   r   _get_op_stats_list   s*   	






r@   Fc                 C   s   dd }| du rt j } t| }t|}|r>t|dkr>tt|D ]}tddt	| d  t j
j|||  q#tdd t j
j|| dS )	a
  
    Collect the number of operators for different data types through parsing
    the program. The statistical data are categorized according to four data
    types, namely float32, float16, bfloat16 and others.

    Args:
        program(Program, optional): The program to parse. Default None, and the default main_program will be parsed.
        print_subblocks(bool, optional): Whether to print the operator stats for each subblock. Default False.

    Examples:

        .. code-block:: python

            >>> import paddle
            >>> paddle.enable_static()

            >>> class SimpleConvNet(paddle.nn.Layer):
            ...     def __init__(self):
            ...         super().__init__()
            ...         self.conv = paddle.nn.Conv2D(in_channels=1, out_channels=6, kernel_size=3)
            ...         self.linear = paddle.nn.Linear(in_features=26, out_features=10)
            ...
            ...     def forward(self, x):
            ...         out = self.conv(x)
            ...         out = paddle.nn.functional.relu(out)
            ...         out = self.linear(out)
            ...         out = paddle.nn.functional.softmax(out)
            ...         return out

            >>> main_program = paddle.static.Program()
            >>> startup_program = paddle.static.Program()
            >>> with paddle.utils.unique_name.guard():
            ...     with paddle.static.program_guard(main_program, startup_program):
            ...         model = SimpleConvNet()
            ...         x = paddle.static.data(
            ...             name='input', shape=[None, 1, 28, 28], dtype='float32'
            ...         )
            ...         out = model(x)
            ...         loss = paddle.mean(out)
            ...         optimizer = paddle.optimizer.AdamW()
            ...         optimizer = paddle.static.amp.decorate(optimizer)
            ...         optimizer.minimize(loss)
            >>> paddle.static.amp.debugging.collect_operator_stats(main_program)
            <------------------------------------------------ op list of all blocks ------------------------------------------------->
            <------------------------------------------------------- op list -------------------------------------------------------->
            <--------------- Op Name ---------------- | -- FP16 Calls --- | -- BF16 Calls --- | --- FP32 Calls--- | -- Other Calls -->
            adamw                                   |  0                |  0                |  4                |  0
            cast                                    |  5                |  0                |  6                |  0
            check_finite_and_unscale                |  0                |  0                |  1                |  0
            conv2d                                  |  1                |  0                |  0                |  0
            conv2d_grad                             |  1                |  0                |  0                |  0
            elementwise_add                         |  2                |  0                |  0                |  0
            elementwise_add_grad                    |  2                |  0                |  0                |  0
            elementwise_mul                         |  0                |  0                |  1                |  0
            elementwise_mul_grad                    |  0                |  0                |  1                |  0
            fill_constant                           |  0                |  0                |  1                |  0
            matmul_v2                               |  1                |  0                |  0                |  0
            matmul_v2_grad                          |  1                |  0                |  0                |  0
            memcpy                                  |  0                |  0                |  0                |  1
            reduce_mean                             |  0                |  0                |  1                |  0
            reduce_mean_grad                        |  0                |  0                |  1                |  0
            relu                                    |  1                |  0                |  0                |  0
            relu_grad                               |  1                |  0                |  0                |  0
            reshape2                                |  0                |  0                |  1                |  0
            reshape2_grad                           |  0                |  0                |  1                |  0
            softmax                                 |  0                |  0                |  1                |  0
            softmax_grad                            |  0                |  0                |  1                |  0
            update_loss_scaling                     |  0                |  0                |  1                |  0
            <----------------------------------------------------- op count: 22 ----------------------------------------------------->
    c                 S   s"   |   D ]
\}}| | |< q| S r   )r/   r   )Zop_stats_unit_dictkeyvaluer   r   r   _convert_to_list  s   z0collect_operator_stats.<locals>._convert_to_listNr   z
<{:-^120}>z op list of block  z op list of all blocks )r   ZstaticZdefault_main_programr@   r4   r!   rangeprintr$   stramp	debuggingZ_print_operator_stats)r?   Zprint_subblocksrC   r2   Zmerged_op_statsir   r   r   collect_operator_stats   s   H

rK   )NF)r1   loggingr   Zpaddle.base.log_helperr   r   INFOr"   r   r   r*   r.   r4   r@   rK   r   r   r   r   <module>   s   $-)