o
    )i?6                     @   s6  U d dl Z d dlmZ d dlmZmZmZ d dlmZm	Z	m
Z
mZmZ d dlZd dlmZmZmZ d dlmZmZmZ d dlmZ d dlmZmZ d d	lmZmZmZm Z m!Z!m"Z" eG d
d dZ#eG dd dZ$eG dd dZ%ee%e$f Z&ee'd< eG dd dZ(eG dd deZ)G dd deZ*dS )    N)defaultdict)asdict	dataclassfield)AnyCallableOptional	TypeAliasUnion)
DeviceType_KinetoEvent_ProfilerResult)
_EventType_ExperimentalConfig_ProfilerEvent)FunctionEvent)ProfilerActivityprofile)TablePrinterevent_has_moduleevent_is_torch_opevent_module_reprevent_torch_op_stack_traceindent_stringc                   @   sl   e Zd ZU eed< dZed  ed< eedZ	ed  ed< dZ
eed< edd	 Zed
d Zedd ZdS )_ModuleTreeNodeeventNparent)default_factorychildren tracec                 C   s   | j jd u pt| j jdkS Nr   )r   r   lenself r%   k/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/profiler/layerwise_profile.pyis_leaf   s   z_ModuleTreeNode.is_leafc                 C   s
   t | jS N)r   r   r#   r%   r%   r&   is_torch_op   s   
z_ModuleTreeNode.is_torch_opc                 C   s"   | j jtjko| j jd jtjkS )N   )r   tagr   Kinetotypeddevice_typer   CUDAr#   r%   r%   r&   is_cuda#   s   z_ModuleTreeNode.is_cuda)__name__
__module____qualname__r   __annotations__r   r   r   listr   r    strpropertyr'   r)   r0   r%   r%   r%   r&   r      s   
 

r   c                   @   s.   e Zd ZU eed< eed< eed< eed< dS )SummaryStatsEntrynamecuda_time_uspct_cuda_timeinvocationsN)r1   r2   r3   r6   r4   floatintr%   r%   r%   r&   r8   )   s
   
 r8   c                   @   s6   e Zd ZU eed< eed< eed< eed< eed< dS )ModelStatsEntryr9   cpu_time_usr:   r;   r    N)r1   r2   r3   r6   r4   r=   r%   r%   r%   r&   r?   1   s   
 r?   
StatsEntryc                   @   s.   e Zd ZU eed< ee ed< ee ed< dS )_StatsTreeNodeentryr   r   N)r1   r2   r3   rA   r4   r5   r   r%   r%   r%   r&   rB   =   s   
 rB   c                	   @   s  e Zd ZU eed< eddZeee	e
 f ed< eddZeee	e f ed< eddZe	e ed< eddZe	e ed< eddZe	e ed< d	Zee ed
< dd Zd1deeef fddZd1deeef fddZdefddZdefddZdeeef fddZe	d2de	eeef  dee egef ef fddZ!dd  Z"d!d" Z#d#efd$d%Z$d#efd&d'Z%d(d) Z&d*d+ Z'd,e	e de	eeef  fd-d.Z(d,e	e de	e fd/d0Z)d	S )3LayerwiseProfileResults_kineto_resultsF)init_kineto_event_correlation_map_event_correlation_map_module_tree_model_stats_tree_summary_stats_treeNnum_running_seqsc                 C   s   |    |   |   d S r(   )_build_correlation_map_build_module_tree_build_stats_treesr#   r%   r%   r&   __post_init__Q   s   z%LayerwiseProfileResults.__post_init__column_widthsc                 C   s^   t dddddd}|r|jd	i | dd | | jD }tt|| j|dd d d S )
N<      r9   r@   r:   r;   r    c                 S   s,   g | ]\}}|j d ks|jd kr||fqS r   )r:   r@   .0depthrowr%   r%   r&   
<listcomp>^   s
    z=LayerwiseProfileResults.print_model_table.<locals>.<listcomp>c                 S      dd|   d S N|- r%   indentr%   r%   r&   <lambda>f       z;LayerwiseProfileResults.print_model_table.<locals>.<lambda>indent_styler%   )dictupdate_flatten_stats_treerJ   r   r?   print_table _indent_row_names_based_on_depth)r$   rQ   _column_widthsZfiltered_model_tabler%   r%   r&   print_model_tableV   s"   

z)LayerwiseProfileResults.print_model_tablec                 C   s\   t ddddd}|r|jd
i | dd | | jD }tt|| j|dd d	 d S )NP   rS      r9   r:   r;   r<   c                 S   s"   g | ]\}}|j d kr||fqS rU   r:   rV   r%   r%   r&   rZ   o   s
    
z?LayerwiseProfileResults.print_summary_table.<locals>.<listcomp>c                 S   r[   r\   r%   r`   r%   r%   r&   rb   v   rc   z=LayerwiseProfileResults.print_summary_table.<locals>.<lambda>rd   r%   )rf   rg   rh   rK   r   r8   ri   rj   )r$   rQ   rk   Zfiltered_summary_tabler%   r%   r&   print_summary_tableh   s$   
z+LayerwiseProfileResults.print_summary_tablefilenamec                 C   *   t dd | | jD }|| d S )Nc                 S      g | ]\}}t |qS r%   r   rW   _rY   r%   r%   r&   rZ   y       zHLayerwiseProfileResults.export_model_stats_table_csv.<locals>.<listcomp>)pd	DataFramerh   rJ   to_csvr$   rr   dfr%   r%   r&   export_model_stats_table_csvx      

z4LayerwiseProfileResults.export_model_stats_table_csvc                 C   rs   )Nc                 S   rt   r%   ru   rv   r%   r%   r&   rZ      rx   zJLayerwiseProfileResults.export_summary_stats_table_csv.<locals>.<listcomp>)ry   rz   rh   rK   r{   r|   r%   r%   r&   export_summary_stats_table_csv   r   z6LayerwiseProfileResults.export_summary_stats_table_csvreturnc                 C   s"   d| j i| | j| | jdS )NrL   )metadataZsummary_statsZmodel_stats)rL   _convert_stats_tree_to_dictrK   rJ   r#   r%   r%   r&   convert_stats_to_dict   s
   

z-LayerwiseProfileResults.convert_stats_to_dictr_   depths_rowsre   c                 C   sF   g }| D ]\}}|j dkrqt|}t|j|||_|| q|S r!   )r:   copydeepcopyr   r9   append)r   re   Zindented_rowsrX   rY   Zindented_rowr%   r%   r&   rj      s   

z8LayerwiseProfileResults._indent_row_names_based_on_depthc                 C   s2   t t| _| j D ]}| j|  | q
d S r(   )r   r5   rG   rE   eventscorrelation_idr   )r$   r   r%   r%   r&   rM      s   
z.LayerwiseProfileResults._build_correlation_mapc                    sF   g _ j }	 ddtdtt f fdd |D ]} | qd S )Nr   	curr_nodec                    s   | j dkrd S t| r"t| |d}|r|j| nj| |}| jd u p-t| jdk}|rG|rGt| |t| dd dd}|j| |}| jD ]} || qJd S )Nr*   )r   r   r   c                 S   s   t | S r(   )r   )xr%   r%   r&   rb      s    zSLayerwiseProfileResults._build_module_tree.<locals>._df_traversal.<locals>.<lambda>)Zuntil)r   r   r    )Z	start_tidr   r   r   r   rI   r"   r   )r   r   noder'   child_df_traversalr$   r%   r&   r      s,   

zALayerwiseProfileResults._build_module_tree.<locals>._df_traversalr(   )rI   rE   Zexperimental_event_treer   r   r   )r$   Z
event_treerootr%   r   r&   rN      s   

z*LayerwiseProfileResults._build_module_treer   c                    s@    j jtjkr	d S | j j jg } fdd|D }t|d S )Nc                 3   s2    | ]}|  tjkr|  jjkr|V  qd S r(   )r.   r   r/   r9   r   )rW   r   r   r%   r&   	<genexpr>   s    z@LayerwiseProfileResults._get_kineto_gpu_event.<locals>.<genexpr>)r   r+   r   r,   rG   getr   next)r$   r   Zcorrelated_kineto_eventsiteratorr%   r   r&   _get_kineto_gpu_event   s   
z-LayerwiseProfileResults._get_kineto_gpu_eventc                    s   dt f fdd  |S )z Return cuda time in microsecondsr   c                    s@   | j r|  }r| d S d}| jD ]}| |7 }q|S )N     @@r   )r'   r   duration_nsr   )r   gpu_kineto_eventZcumulative_cuda_timer   _cumulative_cuda_time_recursiver$   r%   r&   r      s   
zVLayerwiseProfileResults._cumulative_cuda_time.<locals>._cumulative_cuda_time_recursive)r   )r$   r   r%   r   r&   _cumulative_cuda_time   s   z-LayerwiseProfileResults._cumulative_cuda_timec                    s   t  fdd jD S )Nc                    s   g | ]}  |qS r%   )r   )rW   r   r#   r%   r&   rZ      s    z<LayerwiseProfileResults._total_cuda_time.<locals>.<listcomp>)sumrI   r#   r%   r#   r&   _total_cuda_time   s   z(LayerwiseProfileResults._total_cuda_timec                    s   i   fdd	 	ddtdtt dtt ffddg _jD ]
}j| q)	 ddtdtt f fd	d
 g _	jD ]
}j	 | qKd S )Nc                    s   |   d S )Nd   r%   rp   )total_cuda_timer%   r&   r;      s   zALayerwiseProfileResults._build_stats_trees.<locals>.pct_cuda_timer%   r   r   summary_tracec           	         s   t | jrt| j}| }n|  }r"| }| d }nd S ||f }|v rG| j}| j|7  _| j	d7  _	|j|_
ntt|||ddg |d}|r^|j| ||< | jD ]
} || | qe| S )Nr   r*   ro   )rC   r   r   )r   r   r   r   r   r9   r   rC   r:   r<   r;   rB   r8   r   r   )	r   r   r   r9   r:   r   rC   new_noder   )build_summary_stats_tree_dfr;   r$   summary_dictr%   r&   r      s<   





zOLayerwiseProfileResults._build_stats_trees.<locals>.build_summary_stats_tree_dfc           	         s   t | jrt| j}| }| jjd }d}n|  }r/| }| d }d}| j}nd S t	t
|||||d|g d}|rI|j| | jD ]} || qL|S )Ni  r   r   r   rT   )rC   r   r   )r   r   r   r   Zduration_time_nsr   r9   r   r    rB   r?   r   r   )	r   r   r9   r:   r@   r    r   r   r   )build_model_stats_tree_dfr;   r$   r%   r&   r     s4   



zMLayerwiseProfileResults._build_stats_trees.<locals>.build_model_stats_tree_df)Nr%   r(   )
r   r   r   rB   tupler6   rK   rI   r   rJ   )r$   r   r%   )r   r   r;   r$   r   r   r&   rO      s.   &

z*LayerwiseProfileResults._build_stats_treestreec                    s0   g ddt f fdd |D ]} | qS )Nr   r   c                    s0    || jf | jD ]
} ||d d qd S )Nr*   )rX   )r   rC   r   )r   rX   r   df_traversalentriesr%   r&   r   @  s   
zALayerwiseProfileResults._flatten_stats_tree.<locals>.df_traversalrU   )rB   )r$   r   r   r%   r   r&   rh   <  s
   
z+LayerwiseProfileResults._flatten_stats_treec                    s6   g }dt dtt f fdd |D ]} || q|S )Nr   curr_json_listc                    s8   | t| jg d | jD ]} ||d d  qd S )N)rC   r   r   )r   r   rC   r   )r   r   r   r   r%   r&   r   N  s   
zILayerwiseProfileResults._convert_stats_tree_to_dict.<locals>.df_traversal)rB   r5   rf   )r$   r   Z
root_dictsr   r%   r   r&   r   J  s
   z3LayerwiseProfileResults._convert_stats_tree_to_dictr(   )r_   )*r1   r2   r3   r   r4   r   rG   rf   r>   r5   r   rH   r   rI   r   rJ   rB   rK   rL   r   rP   r6   rl   rq   r~   r   r   r   staticmethodr   rA   r
   r   rj   rM   rN   r   r   r   rO   rh   r   r%   r%   r%   r&   rD   D   sX   
 

#
T
rD   c                       s@   e Zd Zd	dee f fddZ fddZ fddZ  ZS )
layerwise_profileNrL   c                    s.   t  jtjtjgdddtddd || _dS )z
        layerwise profile constructor.

        Args:
            num_running_seqs (Optional[int], optional): When given,
            num_running_seqs will be passed to LayerProfileResults for metadata
            update. Defaults to None.
        T)verbose)Z
activitiesZrecord_shapesZ
with_stackZwith_modulesZexperimental_configN)super__init__r   ZCPUr/   r   rL   )r$   rL   	__class__r%   r&   r   ^  s   	

zlayerwise_profile.__init__c                    s
   t   S r(   )r   	__enter__r#   r   r%   r&   r   p  s   
zlayerwise_profile.__enter__c                    s(   t  ||| t| jj| jd| _d S )N)rL   )r   __exit__rD   ZprofilerZkineto_resultsrL   results)r$   exc_typeexc_valexc_tbr   r%   r&   r   s  s
   zlayerwise_profile.__exit__r(   )	r1   r2   r3   r   r>   r   r   r   __classcell__r%   r%   r   r&   r   \  s    r   )+r   collectionsr   dataclassesr   r   r   typingr   r   r   r	   r
   Zpandasry   Ztorch._C._autogradr   r   r   Ztorch._C._profilerr   r   r   Ztorch.autograd.profilerr   Ztorch.profilerr   r   Zvllm.profiler.utilsr   r   r   r   r   r   r   r8   r?   rA   r4   rB   rD   r   r%   r%   r%   r&   <module>   s0   
   