o
    * i/                     @   sB   d dl Z d dlZd dlZd dlZd dlZdd ZG dd dZdS )    Nc                 C   s:   t j| t jt jddd}| \}}|rt| |}|S )NT)stdoutstderruniversal_newlinesshell)
subprocessPopenPIPEcommunicatewarningswarn)cmderr_msgdefault_valueprocessr   r    r   t/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/distributed/launch/utils/topology.pycall_cmd   s   
r   c                   @   s\   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd ZdS )SingleNodeTopologyc                 C   s8   d| _ td| _d| _d| _i | _g | _g | _d | _d S )N        inf         )	pcie_latencyfloatpcie_bandwidthnvlink_bandwidth
nb_devicesmachinedeviceslinksjson_objectselfr   r   r   __init__(   s   

zSingleNodeTopology.__init__c                 C   s  d}d}d}t |||}d}d}d}t |||}d}d}d	}t |||}d
}d}d}	d}d}d}t |||}
|
r<d}d
}	d}d}d}t |||}
|
rNd}d}	d}d}d}t |||}
|
r`d}d}	t|t| t| | }|| }||	 }|| jd< || jd< d S )Nz,lscpu | grep 'Socket(s)' | awk '{print $NF}'zFailed to get number of sockets   z5lscpu | grep 'Core(s) per socket' | awk '{print $NF}'z(Failed to get number of cores per socket   zElscpu | grep GHz | awk -F '@' '{print $NF}' | awk -F 'G' '{print $1}'zFailed to get cpu clock rateg333333@   r   zlscpu | grep ssezFailed to get cpu vector sizeZssezlscpu | grep avx2Zavx2r   zlscpu | grep avx512Zavx512   	sp_gflops	dp_gflops)r   intr   r   )r"   r   r   r   Z
nb_socketsZnb_cores_per_socketZ
clock_rateZnb_fmasZsimd_width_spZsimd_width_dpZvector_sizeZgflops_per_elementr(   r)   r   r   r   calculate_cpu_flops3   s`   
z&SingleNodeTopology.calculate_cpu_flopsc                 C   sL   |dkrdS |dkrdS |dkrdS |dkrdS |d	krd
S |dkr$dS d S )N   g      ?r&   g      ?   g      ?r$   g       @   g      @   g       @r   )r"   pcie_generationr   r   r   pcie_gen2bandwidthu   s   z%SingleNodeTopology.pcie_gen2bandwidthc                 C   s\   d|v r
d|v r
dS d|v rd|v rdS d|v rdS d|v r dS d	|v r&d
S d|v r,dS d S )NZH100ZSXM5)i`  i0u  ZPCIe)i  i]  ZA100)i,L  i%  ZA800ZV100)iT=  ix  ZP100)ih)  i  r   )r"   modelr   r   r   model2gflops   s   zSingleNodeTopology.model2gflopsc           
      C   s   d| }d| }dt | d t | d }d}d}t|||}| j}	d|v rH| jdkr;d	}d
}d}tt|||| _t|dd  | j }	d}||	fS )Nr&   z cat /tmp/matrix.txt | awk 'FNR==z	 {print $z}'Failed to get topo matrixZNVLZNVr   z8nvidia-smi nvlink -s -i 0 | tail -n 1 | awk '{print $3}'zFailed to get nvlink bandwidthZ25)strr   r   r   r   r*   )
r"   Z	source_idZ	target_idZrow_idZ	column_idr   r   r   	link_typelink_bandwidthr   r   r   get_link_bandwidth   s6   

z%SingleNodeTopology.get_link_bandwidthc                 C   s   d}d}d}t ||| }d}d}d}t ||| }d}d}d	}tt |||d
 }|   || jd< || jd< || jd< d S )Nzhostname -szFailed to get hostname	localhostzhostname -izFailed to get host ip addressz	127.0.0.1zUcat /proc/meminfo | grep 'MemAvailable' | awk -F ':' '{print $NF}' | awk '{print $1}'zFailed to get cpu memoryZ41366484g    .Ahostnameaddrmemory)r   stripr*   r+   r   )r"   r   r   r   r:   ip_addrZ
cpu_memoryr   r   r   get_host_info   s    

z SingleNodeTopology.get_host_infoc                 C   s\  d}d}d}t t|||| _t td}|| jk r|| _t| jD ]6}dt| d }d}d}t t|||}dt| d	 }d
}d}t t|||}t| j| 	|| | _q#g }g }	g }
g }g }g }g }t
j
j }t| jD ]V}|||  |	| |
d dt| d }d}d}|t|||  dt| d }d}d}|t t|||d  | || \}}|| || qstt|D ]4}i }|| |d< |	| |d< |
| |d< || |d< || |d< || |d< || |d< | j| q| j| jd< | j| jd< |
d | jd< |
d  d|d  | jd < | j| jd!< d S )"Nznvidia-smi -L | wc -lzFailed to get device count8ZPADDLE_LOCAL_SIZEznvidia-smi --id=z4 --query-gpu=pcie.link.gen.max --format=csv,noheaderz&Failed to get max pcie link generation4z6 --query-gpu=pcie.link.width.max --format=csv,noheaderz!Failed to get max pcie link widthZ16ZGPUz' --query-gpu=name --format=csv,noheaderzFailed to get device namezNVIDIA A100-SXM4-40GBzA --query-gpu=memory.free --format=csv,noheader | awk '{print $1}'z%Failed to get device available memoryZ40536g     @@Z	global_idZlocal_idtyper2   r<   r(   r)   latency	bandwidthr   Zdevice_type-Zdevice_type_fullr   )r*   r   r   osgetenvranger5   minr   r1   paddledistributedget_rankappendr=   r3   lenr   r   r   )r"   r   r   r   Z
local_sizeir0   Z
pcie_widthZdev_global_idsZdev_local_idsZ	dev_typesZ
dev_modelsZdev_memoriesZdev_sp_gflopsZdev_dp_gflops
rank_firstr(   r)   Zdevicer   r   r   get_device_info   s   



z"SingleNodeTopology.get_device_infoc                 C   s  g }g }g }g }g }d}d}d}t ||| tjj }	t| jD ]\}
t| jD ]T}|
|kr9|d |d q(||
|	  |||	  |d |
|krf|| j |
 }|||  |||  q(|
|k r|| |
|\}}|| || q(q!tt| jD ]}
|	|
| j |
  |	|
| j |
  qd}d}d}tt
|D ](}
i }||
 |d	< ||
 |d
< ||
 |d< ||
 |d< ||
 |d< | j| q| j| jd< d S )Nz$nvidia-smi topo -m > /tmp/matrix.txtr4    Xr   r   zrm /tmp/matrix.txtzFailed to delete matrix.txtZsource_global_idZtarget_global_idrB   rC   rD   r   )r   rJ   rK   rL   rH   r   rM   r8   reversedpoprN   r   r   )r"   Zlink_source_global_idsZlink_target_global_idsZ
link_typesZlink_latenciesZlink_bandwidthsr   r   r   rP   rO   jindexr6   r7   linkr   r   r   get_link_info&  sZ   



z SingleNodeTopology.get_link_infoc                 C   s.   |    |   |   tj| jdd| _d S )Nr$   indent)r?   rQ   rY   jsondumpsr   r    r!   r   r   r   detect\  s   zSingleNodeTopology.detectc                 C   sB   t |d}tj| j|dd W d    d S 1 sw   Y  d S )Nwr$   rZ   )openr\   dumpr   )r"   Zoutput_pathoutfiler   r   r   ra   h  s   "zSingleNodeTopology.dumpN)__name__
__module____qualname__r#   r+   r1   r3   r8   r?   rQ   rY   r^   ra   r   r   r   r   r   '   s    B [6r   )r\   rF   r   r
   rJ   r   r   r   r   r   r   <module>   s   