o
    1 i#                     @   sT  d dl Z d dlZd dlmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZ er:d dlmZ eeZee dg d	Zd d i i i i i d
i f	ej_ededefddZe			
ddddeee  dededef
ddZe		ddddeee  dedee fddZe		
ddee dee dedefddZ dS )     N)TYPE_CHECKINGListOptional)DEFAULT_POLICY_ID)OldAPIStack)LEARNER_STATS_KEY)GradInfoDictLearnerStatsDict
ResultDict)EnvRunnerGroupRolloutMetrics)	episode_lengthepisode_rewardagent_rewardscustom_metrics
perf_stats	hist_datamediaepisode_faultyconnector_metricsF	grad_inforeturnc                 C   sJ   t | v r| t  S i }|  D ]\}}t|tu r"t |v r"|t  ||< q|S )aD  Return optimization stats reported from the policy.

    .. testcode::
        :skipif: True

        grad_info = worker.learn_on_batch(samples)

        # {"td_error": [...], "learner_stats": {"vf_loss": ..., ...}}

        print(get_stats(grad_info))

    .. testoutput::

        {"vf_loss": ..., "policy_loss": ...}
    )r   itemstypedict)r   Zmultiagent_statskv r   h/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/ray/rllib/evaluation/metrics.pyget_learner_stats$   s   r      workersr   remote_worker_idstimeout_secondskeep_custom_metricsc                 C   s    t | ||d}t|||d}|S )a  Gathers episode metrics from rollout worker set.

    Args:
        workers: EnvRunnerGroup.
        remote_worker_ids: Optional list of IDs of remote workers to collect
            metrics from.
        timeout_seconds: Timeout in seconds for collecting metrics from remote workers.
        keep_custom_metrics: Whether to keep custom metrics in the result dict as
            they are (True) or to aggregate them (False).

    Returns:
        A result dict of metrics.
    )r#   )r$   )collect_episodessummarize_episodes)r!   r"   r#   r$   episodesmetricsr   r   r   collect_metricsA   s   r)   c                 C   sH   | j dd d||d}t|dkrtd g }|D ]}|| q|S )a`  Gathers new episodes metrics tuples from the given RolloutWorkers.

    Args:
        workers: EnvRunnerGroup.
        remote_worker_ids: Optional list of IDs of remote workers to collect
            metrics from.
        timeout_seconds: Timeout in seconds for collecting metrics from remote workers.

    Returns:
        List of RolloutMetrics.
    c                 S   s   |   S )N)Zget_metrics)wr   r   r   <lambda>s   s    z"collect_episodes.<locals>.<lambda>T)Zlocal_env_runnerr"   r#   r   zWARNING: collected no metrics.)Zforeach_env_runnerlenloggerwarningextend)r!   r"   r#   Zmetric_listsr'   r(   r   r   r   r%   ^   s   
r%   r'   new_episodesc           "      C   s  |du r| }g }g }t t}t t}t t}t t}t t}	t t}
d}| D ]}|j D ]\}}|| | q3|jrG|d7 }q,||j ||j |j	 D ]\}}|| | qXt
|jdkpot|jv}|r|j D ]\\}}}|| | qw|j D ]\}}||  |7  < q|j D ]\}}|	| | qt|dr|j D ]}| D ]}| D ]\}}|
| | qqqq,|rt|}t|}t|}ntd}td}td}|rt|}ntd}||d< ||d< i }i }i }|  D ]!\}}t|||< t|||< t|||< ||d|< q	|  D ]G\}}d	d
 |D } |rE| ||< q1t| ||d < | rdt| ||d < t| ||d < ntd||d < td||d < ||= q1|  D ]\}}t|||< qt }!|
 D ]\}}t||!|< qtd i d|d|d|d|dt|	dt|d|d|d|dt|dt|dt|d|d|!dt
|d|d|d|dt
|S )!a  Summarizes a set of episode metrics tuples.

    Args:
        episodes: List of most recent n episodes. This may include historical ones
            (not newly collected in this iteration) in order to achieve the size of
            the smoothing window.
        new_episodes: All the episodes that were completed in this iteration.
        keep_custom_metrics: Whether to keep custom metrics in the result dict as
            they are (True) or to aggregate them (False).

    Returns:
        A result dict of metrics.
    Nr      r   nanr   episode_lengthszpolicy_{}_rewardc                 S   s    g | ]}t t |s|qS r   )npanyisnan).0r   r   r   r   
<listcomp>   s     z&summarize_episodes.<locals>.<listcomp>Z_mean_min_maxZepisode_reward_maxZepisode_reward_minZepisode_reward_meanZepisode_len_meanepisode_mediaZepisodes_timesteps_totalpolicy_reward_minpolicy_reward_maxpolicy_reward_meanr   
hist_statsZsampler_perfnum_faulty_episodesZnum_episodesZepisode_return_maxZepisode_return_minZepisode_return_meanZepisodes_this_iterr   )collectionsdefaultdictlistr   r   appendr   r   r   r   r,   r   r   r   r   hasattrr   valuesminmaxr4   meanfloatcopyformatr   sum)"r'   r0   r$   Zepisode_rewardsr3   Zpolicy_rewardsr   r   r?   r;   r   r@   Zepisoder   r   Zis_multi_agent_Z	policy_idZrewardZper_pipeline_metricsZper_connector_metricsZconnector_metric_namevalZ
min_rewardZ
max_rewardZ
avg_rewardZ
avg_lengthr<   r>   r=   ZrewardsZv_listZfiltZmean_connector_metricsr   r   r   r&      s   







	
r&   )Nr    F)Nr    )NF)!rA   loggingtypingr   r   r   numpyr4   Zray.rllib.policy.sample_batchr   Zray.rllib.utils.annotationsr   Z$ray.rllib.utils.metrics.learner_infor   Zray.rllib.utils.typingr   r	   r
   Zray.rllib.env.env_runner_groupr   	getLogger__name__r-   
namedtupler   __new____defaults__r   intboolr)   r%   r&   r   r   r   r   <module>   sv    


#