o
    1 i                     @   s   d Z ddlZddlZddlZddlmZmZ ddlm	Z	 ddl
mZ eeZe	dejfddZe	dd
ededefddZe	dd ZdS )z7Utils for minibatch SGD across multiple RLlib policies.    N)MultiAgentBatchSampleBatch)OldAPIStack)LearnerInfoBuilderarrayc                 C   s   | |    td|   S )zNormalize the values in an array.

    Args:
        array (np.ndarray): Array of values to normalize.

    Returns:
        array with zero mean and unit standard deviation.
    g-C6?)meanmaxZstd)r    r	   _/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/ray/rllib/utils/sgd.pystandardized   s   
r   Tsamplessgd_minibatch_sizeshufflec           
      c   s    |s| V  dS t | trtdd| vrd| vr|   | |}|\}}t|dkrC|r3t| |D ]\}}| || V  q5dS tt||}|rQt| |D ]\\}}\}}	| 	||||	V  qSdS )a  Return a generator yielding minibatches from a sample batch.

    Args:
        samples: SampleBatch to split up.
        sgd_minibatch_size: Size of minibatches to return.
        shuffle: Whether to shuffle the order of the generated minibatches.
            Note that in case of a non-recurrent policy, the incoming batch
            is globally shuffled first regardless of this setting, before
            the minibatches are generated from it!

    Yields:
        SampleBatch: Each of size `sgd_minibatch_size`.
    Nz;Minibatching not implemented for multi-agent in simple modeZ
state_in_0Zstate_out_0r   )

isinstancer   NotImplementedErrorr   Z_get_slice_indiceslenrandomlistzipslice)
r   r   r   Z
all_slicesZdata_slicesZstate_slicesijsiZsjr	   r	   r
   minibatches   s0   



r   c                 C   s   |   } tdd}| D ]X\}}|| jvrq| j| }	|D ]
}
t|	|
 |	|
< q| rC|jd d |krCtd||jd d t	|D ]}t
|	|D ]}|t||i|j| }||| qNqGq| }|S )a	  Execute minibatch SGD.

    Args:
        samples: Batch of samples to optimize.
        policies: Dictionary of policies to optimize.
        local_worker: Master rollout worker instance.
        num_sgd_iter: Number of epochs of optimization to take.
        sgd_minibatch_size: Size of minibatches to use for optimization.
        standardize_fields: List of sample field names that should be
            normalized prior to optimization.

    Returns:
        averaged info fetches over the last SGD epoch taken.
       )Znum_devicesmodelZmax_seq_lenzC`sgd_minibatch_size` ({}) cannot be smaller than`max_seq_len` ({}).)Zas_multi_agentr   itemsZpolicy_batchesr   Zis_recurrentconfig
ValueErrorformatranger   Zlearn_on_batchr   countZadd_learn_on_batch_resultsfinalize)r   ZpoliciesZlocal_workerZnum_sgd_iterr   Zstandardize_fieldsZlearner_info_builderZ	policy_idpolicybatchfieldr   Z	minibatchresultsZlearner_infor	   r	   r
   do_minibatch_sgdH   s:   


	r'   )T)__doc__loggingr   numpynpZray.rllib.policy.sample_batchr   r   Zray.rllib.utils.annotationsr   Z$ray.rllib.utils.metrics.learner_infor   	getLogger__name__loggerZndarrayr   intboolr   r'   r	   r	   r	   r
   <module>   s    
+