o
    1 i%                     @   s  d dl Z d dlZd dlmZmZ d dlmZmZ d dlm	Z	m
Z
mZmZmZmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dl m!Z! d dl"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* e	rd dl+m,Z, d dl-m.Z. d dl/m0Z0 e& \Z1Z2Z3e 4e5Z6edg dZ7eee
  Z8G dd deZ9e#G dd deedZ:e#G dd de:Z;dS )    N)ABCMetaabstractmethod)defaultdict
namedtuple)TYPE_CHECKINGAnyListOptionalTypeUnion)DEPRECATED_VALUEdeprecation_warning)BaseEnvconvert_to_base_env)SampleCollector)SimpleListCollector)EnvRunnerV2
_PerfStats)RolloutMetrics)InputReader)concat_samples)OldAPIStackoverride)try_import_tf)SampleBatchType)log_once)RLlibCallback)ObservationFunction)RolloutWorker_PolicyEvalData)env_idZagent_idZobsinfoZ	rnn_stateZprev_actionZprev_rewardc                   @   s   e Zd Zdd ZdS )_NewEpisodeDefaultDictc                 C   s(   | j d u r	t||  | }| |< |S N)default_factoryKeyError)selfr    ret r(   h/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/ray/rllib/evaluation/sampler.py__missing__-   s   
z"_NewEpisodeDefaultDict.__missing__N)__name__
__module____qualname__r*   r(   r(   r(   r)   r"   ,   s    r"   c                   @   sd   e Zd ZdZeedefddZedefddZ	ede
e fddZede
e fd	d
ZdS )SamplerInputz1Reads input experiences from an existing sampler.returnc                 C   s4   |   g}||   t|dkrtdt|S )Nr   zNo data available from sampler.)get_dataextendget_extra_batcheslenRuntimeErrorr   )r&   Zbatchesr(   r(   r)   next9   s
   
zSamplerInput.nextc                 C      t )zCalled by `self.next()` to return the next batch of data.

        Override this in child classes.

        Returns:
            The next batch of data.
        NotImplementedErrorr&   r(   r(   r)   r0   A   s   	zSamplerInput.get_datac                 C   r6   )a$  Returns list of episode metrics since the last call to this method.

        The list will contain one RolloutMetrics object per completed episode.

        Returns:
            List of RolloutMetrics objects, one per completed episode since
            the last call to this method.
        r7   r9   r(   r(   r)   get_metricsL   s   
zSamplerInput.get_metricsc                 C   r6   )a  Returns list of extra batches since the last call to this method.

        The list will contain all SampleBatches or
        MultiAgentBatches that the user has provided thus-far. Users can
        add these "extra batches" to an episode by calling the episode's
        `add_extra_batch([SampleBatchType])` method. This can be done from
        inside an overridden `Policy.compute_actions_from_input_dict(...,
        episodes)` or from a custom callback's `on_episode_[start|step|end]()`
        methods.

        Returns:
            List of SamplesBatches or MultiAgentBatches provided thus-far by
            the user since the last call to this method.
        r7   r9   r(   r(   r)   r2   X   s   zSamplerInput.get_extra_batchesN)r+   r,   r-   __doc__r   r   r   r5   r   r0   r   r   r:   r2   r(   r(   r(   r)   r.   5   s    
r.   )	metaclassc                   @   s   e Zd ZdZddddddddddddeeedddd	ed
eeef de	de
dddedededed deee  defddZeedefddZeedee fddZeedee fddZdS )SyncSamplerzHSync SamplerInput that collects experiences when `get_data()` is called.Z	env_stepsFTN)count_steps_bymultiple_episodes_in_batchnormalize_actionsclip_actionsobservation_fnsample_collector_classrenderpoliciespolicy_mapping_fnpreprocessorsobs_filterstf_sesshorizonsoft_horizonno_done_at_endworkerr   envclip_rewardsrollout_fragment_lengthr>   	callbacksr   r?   r@   rA   rB   r   rC   rD   c             
   C   s(  t drO|durtdd |durtdd |durtdd |dur(tdd |dur1tdd |tkr;td	d
d |tkrEtdd
d |tkrOtdd
d t|| _|| _t | _t	|j
jd| _|sht}||j|||||d| _|| _t|| j||| j||| jd| _| j | _t | _dS )aN  Initializes a SyncSampler instance.

        Args:
            worker: The RolloutWorker that will use this Sampler for sampling.
            env: Any Env object. Will be converted into an RLlib BaseEnv.
            clip_rewards: True for +/-1.0 clipping,
                actual float value for +/- value clipping. False for no
                clipping.
            rollout_fragment_length: The length of a fragment to collect
                before building a SampleBatch from the data and resetting
                the SampleBatchBuilder object.
            count_steps_by: One of "env_steps" (default) or "agent_steps".
                Use "agent_steps", if you want rollout lengths to be counted
                by individual agent steps. In a multi-agent env,
                a single env_step contains one or more agent_steps, depending
                on how many agents are present at any given time in the
                ongoing episode.
            callbacks: The RLlibCallback object to use when episode
                events happen during rollout.
            multiple_episodes_in_batch: Whether to pack multiple
                episodes into each batch. This guarantees batches will be
                exactly `rollout_fragment_length` in size.
            normalize_actions: Whether to normalize actions to the
                action space's bounds.
            clip_actions: Whether to clip actions according to the
                given action_space's bounds.
            observation_fn: Optional multi-agent observation func to use for
                preprocessing observations.
            sample_collector_class: An optional SampleCollector sub-class to
                use to collect, store, and retrieve environment-, model-,
                and sampler data.
            render: Whether to try to render the environment after each step.
        Zdeprecated_sync_sampler_argsNrE   )oldrF   rG   rH   rI   rJ   T)rR   errorrK   rL   )Zema_coef)r>   )rM   base_envr?   rQ   
perf_statsrP   r>   rD   )r   r   r   r   rT   rP   queueQueueextra_batchesr   configZsampler_perf_stats_ema_coefrU   r   Z
policy_mapZsample_collectorrD   r   Z_env_runner_objrun_env_runnermetrics_queue)r&   rM   rN   rO   rP   r>   rQ   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   r(   r(   r)   __init__o   s\   <







zSyncSampler.__init__r/   c                 C   s*   	 t | j}t|tr| j| n|S qr#   )r5   r[   
isinstancer   r\   put)r&   itemr(   r(   r)   r0      s   

zSyncSampler.get_datac                 C   sB   g }	 z| | j j| j d W n tjy   Y |S w q)NT)rU   )appendr\   
get_nowait_replacerU   getrV   Empty)r&   	completedr(   r(   r)   r:      s   
zSyncSampler.get_metricsc                 C   s4   g }	 z
| | j  W n tjy   Y |S w qr#   )ra   rX   rb   rV   re   )r&   extrar(   r(   r)   r2      s   zSyncSampler.get_extra_batches)r+   r,   r-   r;   r   r   r   boolfloatintstrr	   r
   r   r]   r   r.   r   r0   r   r   r:   r2   r(   r(   r(   r)   r=   k   s`    	
	


or=   )<loggingrV   abcr   r   collectionsr   r   typingr   r   r   r	   r
   r   Zray._common.deprecationr   r   Zray.rllib.env.base_envr   r   Z0ray.rllib.evaluation.collectors.sample_collectorr   Z5ray.rllib.evaluation.collectors.simple_list_collectorr   Z"ray.rllib.evaluation.env_runner_v2r   r   Zray.rllib.evaluation.metricsr   Zray.rllib.offliner   Zray.rllib.policy.sample_batchr   Zray.rllib.utils.annotationsr   r   Zray.rllib.utils.frameworkr   Zray.rllib.utils.typingr   Zray.util.debugr   Zray.rllib.callbacks.callbacksr   Z)ray.rllib.evaluation.observation_functionr   Z#ray.rllib.evaluation.rollout_workerr   Ztf1tf_	getLoggerr+   loggerr   Z
StateBatchr"   r.   r=   r(   r(   r(   r)   <module>   sB     	
	5