o
    1 i                     @   s   d dl Z d dlmZ d dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ d dlZd dlmZ d dlmZmZ d d	lmZmZ d d
lmZ eddeG dd deZG dd deZdededefddZeG dd dZdS )    N)	dataclass)OptionalSet)Coordinator)detect_nicsnics_to_env_var)secrettimeout)update_env_vars)WorkerWorkerGroup)BackendBackendConfig)	PublicAPIbeta)Z	stabilityc                   @   s   e Zd ZU dZdZeee  ed< dZ	e
ed< dZee ed< dZee
 ed< dZee ed< dZee ed	< d
Ze
ed< dZe
ed< edd Zdd Zedd ZdS )HorovodConfiga  Configurations for Horovod setup.

    See https://github.com/horovod/horovod/blob/master/horovod/runner/common/util/settings.py # noqa: E501

    Args:
        nics (Optional[Set[str]): Network interfaces that can be used for
            communication.
        verbose: Horovod logging verbosity.
        key (Optional[str]): Secret used for communication between workers.
        ssh_port (Optional[int]): Port for SSH server running on worker nodes.
        ssh_identity_file (Optional[str]): Path to the identity file to
            ssh into different hosts on the cluster.
        ssh_str (Optional[str]): CAUTION WHEN USING THIS. Private key
            file contents. Writes the private key to ssh_identity_file.
        timeout_s: Timeout parameter for Gloo rendezvous.
        placement_group_timeout_s: Timeout parameter for Ray
            Placement Group creation. Currently unused.
    Nnics   verbosekeyssh_portssh_identity_filessh_stri,  	timeout_sd   placement_group_timeout_sc                 C   s   t j| jddS )NzTimed out waiting for {activity}. Please check connectivity between servers. You may need to increase the --start-timeout parameter if you have too many servers.)message)r	   Timeoutr   self r    d/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/ray/train/horovod/config.pystart_timeout/   s   zHorovodConfig.start_timeoutc                 C   sv   | j r-tj| js-t| jd}t| jd || j  W d    n1 s(w   Y  | jd u r9t	
 | _d S d S )Nwi  )r   ospathexistsr   openchmodwriter   r   Zmake_secret_key)r   fr    r    r!   __post_init__9   s   
zHorovodConfig.__post_init__c                 C   s   t S N)_HorovodBackendr   r    r    r!   backend_clsB   s   zHorovodConfig.backend_cls)__name__
__module____qualname____doc__r   r   r   str__annotations__r   intr   r   r   r   r   r   propertyr"   r+   r.   r    r    r    r!   r      s   
 
		r   c                   @   s,   e Zd ZU dZeed< dedefddZdS )r-   Tshare_cuda_visible_devicesworker_groupbackend_configc              
      s\  g }t tD ]}j| jj}||t|t| qt	| t
|| _dd jD  dd jD }tt| D ]\}\}}| j||| qA| j }	g }|	 D ]\}}
||t|
 q[t	| | j } fddt D }fdd|D }t|t| jjksJ t|t| jj|d}|t| t| d S )Nc                 S      g | ]}|j jqS r    )metadatanode_id.0r#   r    r    r!   
<listcomp>e       z,_HorovodBackend.on_start.<locals>.<listcomp>c                 S   r:   r    )r;   hostnamer=   r    r    r!   r?   f   r@   c                    s   g | ]}  |qS r    )index)r>   r<   )node_idsr    r!   r?   y   s    c                    s   g | ]	}t  j| qS r    )_HorovodWorkerWrapperworkers)r>   Zworker_index)r8   r    r!   r?   z   s    )Zall_host_namesnode_workers)rangelenrE   r;   r<   appendZexecute_single_async_init_env_varsraygetr   Zcoordinator	enumeratezipregisterZfinalize_registrationitemsr
   Zestablish_rendezvousset	hostnamesr   listupdater   execute)r   r8   r9   Zsetup_futuresZrankZworker_node_idrR   rA   r<   Zall_infoZlocal_cross_env_varZcoordinator_envsZnode_worker_indexesrF   r   r    )rC   r8   r!   on_startJ   sP   
	





z_HorovodBackend.on_startN)	r/   r0   r1   r7   boolr4   r   r   rV   r    r    r    r!   r-   G   s   
 r-   
world_rank
world_sizer<   c                 C   s*   |t jd< t| t jd< t|t jd< dS )z)Initialize Horovod environment variables.ZHOROVOD_HOSTNAMEZHOROVOD_RANKZHOROVOD_SIZEN)r$   environr3   )rX   rY   r<   r    r    r!   rJ      s   
rJ   c                   @   s"   e Zd ZU eed< edd ZdS )rD   r#   c                    s   | j  G  fddd}| S )Nc                       s   e Zd Z fddZdS )z4_HorovodWorkerWrapper.execute.<locals>.ExecuteHandlec                    s"   d } j jj||g|R i |S r,   )ZactorZ_RayTrainWorker__executeremote)r   funcargskwargs_r#   r    r!   r[      s   z;_HorovodWorkerWrapper.execute.<locals>.ExecuteHandle.remoteN)r/   r0   r1   r[   r    r`   r    r!   ExecuteHandle   s    ra   r`   )r   ra   r    r`   r!   rU      s   z_HorovodWorkerWrapper.executeN)r/   r0   r1   r   r4   r6   rU   r    r    r    r!   rD      s   
 rD   )r$   dataclassesr   typingr   r   Zhorovod.ray.runnerr   Zhorovod.ray.utilsr   r   Zhorovod.runner.common.utilr   r	   rK   Zray.train._internal.utilsr
   Z ray.train._internal.worker_groupr   r   Zray.train.backendr   r   Zray.utilr   r   r-   r5   r3   rJ   rD   r    r    r    r!   <module>   s$    5C