o
    )i                  
   @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlZdd ZdddZ		dd
edede
e	eef  fddZdd ZdefddZedkrje jddZejdedd e Zeej dS dS )    N)quote)DictListc                    s   | st d d S t| d  }dd |D  | D ]|D ]}t | tt|  |< qqd fdd|D }t | t dt|  | D ]d fd	d|D }t | qJd S )
Nz
Empty listr   c                 S   s   i | ]
}|t t|d qS )
   )maxlen.0header r   j/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/xformers/profiler/find_slowest.py
<dictcomp>   s    z+print_json_as_dataframe.<locals>.<dictcomp>z  c                 3   s"    | ]}|d  |  V  qdS <Nr   r   )
col_widthsr   r   	<genexpr>    s     z*print_json_as_dataframe.<locals>.<genexpr>-c                 3   s*    | ]}t | d  |  V  qdS r   )strr   r   rowr   r   r   &   s    
)printlistkeysr   r   r   join)Z	json_listheadersr
   
header_rowZdata_rowr   r   r   print_json_as_dataframe   s$    
r      c                 C   sT   t | jg ddddg dg jdgdd }dd	 |d | D S )
N)namelog_nameduritemsr   r   r    F)Z	ascendingc                 S   (   g | ]\}}||j d  dddqS )  .2f ms)r   Zstd_devr    r	   idxr   r   r   r   
<listcomp>7       zAcompute_std_dev_of_event_durations_over_ranks.<locals>.<listcomp>)r   filtergroupbysumZstdsort_valuesiterrows)eventstopgrouped_sorted_eventsr   r   r   -compute_std_dev_of_event_durations_over_ranks,   s   


r4      top_klast_kreturnc                 C   sP   t | jddgddg dg }dd |d | || d   D S )Nr   r    r!   c                 S   r#   )r$   r%   r&   )r   Znccl_msr'   r(   r   r   r   r*   H   r+   z$sort_nccl_events.<locals>.<listcomp>)r   r,   r-   r.   r/   r0   )Znccl_eventsr6   r7   r3   r   r   r   sort_nccl_events=   s   r9   c                 C   s   d}|  drdt|  dt| }ndt| dt|  }tj|dtjtjd}z#ztj|jd	d
gd}W n t	yD   |
   w W | dksNJ n	| dksWJ w tj| |d< ||jjd }||jjd  }||fS )Nz@.traceEvents[] | select(.cat == "kernel") | [.name, .dur] | @csvz.gzz
gunzip -c z | jq --raw-output zjq --raw-output  T)shellstdoutstderrr   r    )namesr   r   Znccl)endswithr   
subprocessPopenPIPEDEVNULLpdZread_csvr<   	Exception	terminatewaitospathbasenamer   r   
startswith)Zprofile_trace_pathZjq_pipecmdZsubpZkernel_eventscommunication_kernelscomputation_kernelsr   r   r   parse_one_fileP   s(   
$rO   cuda_profile_dirc           
      C   sJ  |  d}t  |}t|dkr|  d}t  |}t|dkr'td|  g }g }tjjdd2}t|t|D ]!\}\}}t	d|d  d	t| d
dd |
| |
| q;W d    n1 sgw   Y  t|}t|}t	  t	d tt| t	d t|}	t	d t|	 t	d t|}	t	d t|	 d S )Nz/*trace.json.gzr   z/*.jsonz=Couldnt find any profiling trace in the specified directory:     )max_workerszProcessed file    /T)endflushz/The longest and shortest communication_kernels:z

z>The standard deviation of nccl kernels durations across ranks:zEThe standard deviation of computation kernels durations across ranks:)globr   rE   
concurrentfuturesThreadPoolExecutor	enumeratemaprO   r   appendrD   concatr   r9   r4   )
rP   Zcuda_profile_path_nameZprofile_filesrM   rN   executorindexZcomm_ksZcomp_ksZstd_dfr   r   r   print_profiling_infol   sF   






	
rb   __main__zProcess CUDA profile directory.)descriptionzThe CUDA profile directory)typehelp)r   )r5   r5   )argparseconcurrent.futuresrY   rX   rH   r@   shlexr   typingr   r   ZpandasrD   r   r4   intr   r9   rO   rb   __name__ArgumentParserparseradd_argument
parse_argsargsrP   r   r   r   r   <module>   s4   

+