o
    1 i                     @   s  d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlmZmZmZ d dlmZ d dlmZmZ d dlmZmZmZmZ d dlmZmZmZ d d	lmZ d d
l m!Z! d dl"m#Z#m$Z$m%Z% d dl&m'Z' e
dZ(e
dZ)e *e+Z,e'G dd dZ-dS )    N)Path)AnyCallableDictListOptionalTypeVarUnion)StartTracebackStartTracebackWithWorkerRankskip_exceptions)Dataset)
Checkpoint
DataConfig)BackendExecutorInactiveWorkerGroupErrorTrainBackendErrorTrainingWorkerError)_TrainingResult_TrainSessionget_session)ActorWrapper)BackendConfig)BaseTrainer
GenDatasetTrainingFailedError)DeveloperAPITSc                   @   s   e Zd ZdZdeeef dedeeg e	f ee
eef ge	f f de
eef de
eef dedeee
eeef  fd	d
Zdd Z	ddee fddZdefddZdd Zdeee
  fddZdd ZdefddZdS )TrainingIteratorzEAn iterator over Train results. Returned by ``trainer.run_iterator``.backend_executorbackend_config
train_funcdatasetsmetadatadata_config
checkpointc                 C   sL   || _ | | _|| _|| _|| _|| _| j|| j| j| j|d d| _d S )N)r"   r#   r$   r%   r&   F)	_backend_executorZbackend_clsZ_backend_train_func	_datasets	_metadata_data_config_start_training_finished_training)selfr    r!   r"   r#   r$   r%   r&    r/   ]/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/ray/train/trainer.py__init__'   s   


zTrainingIterator.__init__c                 C   s   | S Nr/   r.   r/   r/   r0   __iter__B   s   zTrainingIterator.__iter__Nc              	      s:   t  }|s	J d|j fdd d S )Nz8`_start_training` should only be called from within Tunec                      s   j j dS )N)r"   r#   r$   r%   storager&   )r'   Zstart_trainingr/   r&   r%   r#   r$   r.   r5   r"   r/   r0   <lambda>R   s    z2TrainingIterator._start_training.<locals>.<lambda>)r   r5   _run_with_error_handling)r.   r"   r#   r$   r%   r&   Ztune_sessionr/   r6   r0   r,   E   s   z TrainingIterator._start_trainingfuncc              	   C   st   z| W S  t y#   td | | j| j| j| j | | Y S  t	y.   t
dd  ty9   t
dd w )NzSWorkers have been successfully restarted. Resuming training from latest checkpoint.zThis Trainer is not active. It is either shutdown already or never started in the first place. Either create a new Trainer or start this one.zTraining failed. You should not be seeing this error and this is a bug. Please create a new issue at https://github.com/ray-project/ray.)r   loggerinfor,   r(   r)   r*   r+   r8   r   RuntimeErrorr   )r.   r9   r/   r/   r0   r8   \   s4   z)TrainingIterator._run_with_error_handlingc                 C   s   |   r| jjdd tz| | j}|d u r*| jjdd | | j d| _t|W S  tys } z;t	|t
r=|j}nd }t|}dtt|||j}| jjd||d z| jjdd d| _W   tyn   Y  w d }~ww )NF)erroredT )r=   stack_tracefailed_rank)Zgraceful_termination)is_finishedr'   Zreport_final_run_statusStopIterationr8   _fetch_next_result_finish_trainingr-   r
   
isinstancer   Zworker_rankr   join	tracebackformat_exceptiontype__traceback__shutdown	Exception)r.   Znext_resultser@   r?   r/   r/   r0   __next__|   s@   
zTrainingIterator.__next__returnc                 C   s0   | j  }|du rdS tdd |D sJ |S )a  Fetch next results produced by ``session.report()`` from each worker.

        Assumes ``start_training`` has already been called.

        Returns:
            A list of dictionaries of values passed to ``session.report()`` from
                each worker. Each item corresponds to an intermediate result
                a single worker. If there are no more items to fetch,
                returns None.
        Nc                 s   s    | ]}t |tV  qd S r2   )rE   r   ).0resultr/   r/   r0   	<genexpr>   s    z6TrainingIterator._fetch_next_result.<locals>.<genexpr>)r'   Zget_next_resultsall)r.   resultsr/   r/   r0   rC      s
   
z#TrainingIterator._fetch_next_resultc                 C   s
   | j  S )at  Finish training and return final results. Propagate any exceptions.

        Blocks until training is finished on all workers.

        Assumes `start_training` has already been called.

        Returns:
            A list of return values from calling ``train_func`` on each worker.
                Each item corresponds to the return value from a single worker.
        )r'   Zfinish_trainingr3   r/   r/   r0   rD      s   
z!TrainingIterator._finish_trainingc                 C   s   | j S r2   )r-   r3   r/   r/   r0   rA      s   zTrainingIterator.is_finishedr2   )__name__
__module____qualname____doc__r	   r   r   r   r   r   r   strr   r   r   r   r   r   r1   r4   r,   r8   rN   r   rC   rD   boolrA   r/   r/   r/   r0   r   #   s6    
$


	
 'r   ).loggingrG   pathlibr   typingr   r   r   r   r   r   r	   Zray.air._internal.utilr
   r   r   Zray.datar   Z	ray.trainr   r   Z$ray.train._internal.backend_executorr   r   r   r   Zray.train._internal.sessionr   r   r   Zray.train._internal.utilsr   Zray.train.backendr   Zray.train.base_trainerr   r   r   Zray.util.annotationsr   r   r   	getLoggerrU   r:   r   r/   r/   r/   r0   <module>   s$    $
