o
    1 i                     @   s   d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlmZ e eZed	d
eG dd deZdededefddZdd ZG dd deZdS )    N)	dataclass)ray_constants)get_address_and_port)WorkerGroup)BackendBackendConfig)*DEFAULT_JAX_DISTRIBUTED_SHUTDOWN_TIMEOUT_S"JAX_DISTRIBUTED_SHUTDOWN_TIMEOUT_S)	PublicAPIalpha)Z	stabilityc                   @   s&   e Zd ZU dZeed< edd ZdS )	JaxConfigFuse_tpuc                 C   s   t S )N)_JaxBackend)self r   c/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/ray/train/v2/jax/config.pybackend_cls   s   zJaxConfig.backend_clsN)__name__
__module____qualname__r   bool__annotations__propertyr   r   r   r   r   r      s   
 r   master_addr_with_portnum_workersindexc                 C   s@   ddl }tjdd }d|dv r|j| || dS dS )zeSet up distributed Jax training information.

    This function should be called on each worker.
    r   NZJAX_PLATFORMS Ztpu,)jaxosenvirongetlowersplitdistributedZ
initialize)r   r   r   r   Zjax_platformsr   r   r   _setup_jax_tpu_environment   s
   r%   c               
   C   sP   zddl } | j  W dS  ty' } ztd|  W Y d}~dS d}~ww )zShutdown JAX distributed environment.

    This function should be called on each worker during cleanup.
    If JAX distributed was not initialized, this is a no-op.
    r   N'Error during JAX distributed shutdown: )r   r$   shutdown	Exceptionloggerwarning)r   er   r   r   _shutdown_jax_distributed,   s   r,   c                   @   s0   e Zd ZdedefddZdedefddZdS )r   worker_groupbackend_configc              
   C   sj   |j sd S |dt\}}| d| }g }tt|D ]}||j|t|t||d qt	| d S )Nr   :)r   r   r   )
r   Zexecute_singler   rangelenappendZexecute_single_asyncr%   rayr!   )r   r-   r.   Zmaster_addrZmaster_portr   Zsetup_futuresir   r   r   on_start;   s    	z_JaxBackend.on_startc              
   C   s   |j sdS |t}ttt}ztj||d t	
d W dS  tjjy4   t	d| d Y dS  tyN } zt	d|  W Y d}~dS d}~ww )zBCleanup JAX distributed resources when shutting down worker group.N)timeoutz"JAX distributed shutdown completedz)JAX distributed shutdown timed out after z= seconds. This may indicate workers are hung or unresponsive.r&   )r   Zexecute_asyncr,   r   Zenv_integerr	   r   r3   r!   r)   debug
exceptionsZGetTimeoutErrorr*   r(   )r   r-   r.   Zshutdown_futuresZ	timeout_sr+   r   r   r   on_shutdownP   s$   


z_JaxBackend.on_shutdownN)r   r   r   r   r   r5   r9   r   r   r   r   r   :   s    r   )loggingr   dataclassesr   r3   Zray._privater   Zray.train._internal.utilsr   Z ray.train._internal.worker_groupr   Zray.train.backendr   r   Zray.train.constantsr   r	   Zray.utilr
   	getLoggerr   r)   r   strintr%   r,   r   r   r   r   r   <module>   s.    

