o
    1 i8N                     @   s  d dl mZ d dlmZ d dlmZmZ d dlZd dl	Z	d dl
mZmZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZmZmZ e \ZZZe \ZZd
ZdZdZ dZ!eeddddKde"de"dej#fddZ$eeddd	dLdeej# dee% dej#fddZ&edMdede%defddZ'e		dNdej#d ej#d!eej# d"ee( dej#f
d#d$Z)e			dOd%ed&ee d'e%d(e%def
d)d*Z*ed+d, Z+edPdej#d.e,dej#fd/d0Z-edej#dej#fd1d2Z.e				-dQd ej#d!eej# d3eej# de%d4e,f
d5d6Z/ed d-d7ej0fdeee"f d8e"d9e,d:e,d;e1dej#fd<d=Z2eee" fd>d?Z3edRdej#d@e,dej#fdAdBZ4edSdej#dCe%dej#fdDdEZ5e	dTdeej#e6f dGe"dHee, dej#fdIdJZ7dS )U    )OrderedDict)MappingProxyType)ListOptionalN)DiscreteMultiDiscrete)
Deprecated)	PublicAPI)try_import_tftry_import_torch)SpaceStructTensorStructType
TensorTypeUniongư>i    z)RLlib itself has no use for this anymore.F)helperror@   sizealignreturnc                 C   s   | |j  }tj||d  tjd}|jj| }|dkrdn|| }|dkr5|||d  dd |}n||||  |}t|| ksLJ t||jj| dksZJ |jj|S )a  Returns an array of a given size that is 64-byte aligned.

    The returned array can be efficiently copied into GPU memory by TensorFlow.

    Args:
        size: The size (total number of items) of the array. For example,
            array([[0.0, 1.0], [2.0, 3.0]]) would have size=4.
        dtype: The numpy dtype of the array.
        align: The alignment to use.

    Returns:
        A np.ndarray with the given specifications.
       dtyper   )itemsizenpemptyuint8ctypesdataviewlen)r   r   r   nr   Z
data_alignoffsetoutput r&   a/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/ray/rllib/utils/numpy.pyaligned_array   s   
 r(   items
time_majorc                 C   sv  t | dkrg S t | dkr| d S t| d tjr| d jtjtjtjfv r| d j}tt	dd | D |}|durx|du r\t	dd | D }| d j
d |f| d j
dd  }n1t	d	d | D }|| d j
d f| d j
dd  }nt	d
d | D }|f| d j
dd  }||}|jjd dksJ |jjtj| ||rdndd |S tj| |rddS ddS )a  Concatenate arrays, ensuring the output is 64-byte aligned.

    We only align float arrays; other arrays are concatenated as normal.

    This should be used instead of np.concatenate() to improve performance
    when the output array is likely to be fed into TensorFlow.

    Args:
        items: The list of items to concatenate and align.
        time_major: Whether the data in items is time-major, in which
            case, we will concatenate along axis=1.

    Returns:
        The concat'd and aligned array.
    r   r   c                 s   s    | ]}|j V  qd S N)r   .0sr&   r&   r'   	<genexpr>a   s    z!concat_aligned.<locals>.<genexpr>NTc                 s       | ]}|j d  V  qdS )r   Nshaper,   r&   r&   r'   r/   d       r   c                 s   r0   r   Nr1   r,   r&   r&   r'   r/   i   r3   c                 s   r0   r4   r1   r,   r&   r&   r'   r/   n   r3   r   )outaxisr6   )r"   
isinstancer   ndarrayr   float32Zfloat64r   r(   sumr2   reshaper   r    concatenate)r)   r*   r   ZflatZ	batch_dimZ	new_shaper%   r&   r&   r'   concat_aligned=   s@   

r>   Txreduce_typec                    s    fdd}t || S )a  Converts values in `stats` to non-Tensor numpy or python types.

    Args:
        x: Any (possibly nested) struct, the values in which will be
            converted and returned as a new struct with all torch/tf tensors
            being converted to numpy types.
        reduce_type: Whether to automatically reduce all float64 and int64 data
            into float32 and int32 data, respectively.

    Returns:
        A new struct with the same structure as `x`, but with all
        values converted to numpy arrays (on CPU).
    c                    s   t rt| t jrt|  dkr|   n|    }nt	r:t| t	jt	j
fr:t| dr:t	 s5J |  }n| } rat|tjrat|jtjrT|tj}|S t|jtra|tj}|S )Nr   numpy)torchr8   Tensorr"   r   cpuitemdetachrA   tfVariablehasattrexecuting_eagerlyr   r9   Z
issubdtyper   Zfloatingastyper:   intint32)rE   retr@   r&   r'   mapping   s*   
z!convert_to_numpy.<locals>.mapping)treeZmap_structure)r?   r@   rP   r&   rO   r'   convert_to_numpyx   s   rR   weightsbiases	frameworkc                 C   st   d
dd}|| } |dko | j d |j d ko | j d |j d k}|||d}||}t| ||du r7d	 S | S )a  Calculates FC (dense) layer outputs given weights/biases and input.

    Args:
        x: The input to the dense layer.
        weights: The weights matrix.
        biases: The biases vector. All 0s if None.
        framework: An optional framework hint (to figure out,
            e.g. whether to transpose torch weight matrices).

    Returns:
        The dense layer's output.
    Fc                 S   sR   t rt| t jr|    } tr t r t| tjr |  } |r't	
| } | S r+   )rB   r8   rC   rD   rF   rA   rG   rJ   rH   r   	transpose)r    rV   r&   r&   r'   map_   s   
zfc.<locals>.map_rB   r   r   )rV   N        F)r2   r   matmul)r?   rS   rT   rU   rW   rV   r&   r&   r'   fc   s   
&r[   inputsspaces_struct	time_axis
batch_axisc                    s  |r|sJ t | }|durt |ndgt| }d}d}g }t||D ]\ }	|du r<|r< jd }|r< jd }t|	tr[|rLt || g |	t
 |	jdtj q&t|	tr|rlt || dg |r|	tj fddt|	jD dd q&|	tj fd	dt|	jD dd q&t trt g |rt || dg n|rt |dg nt dg |	 tj q&tj|dd}
|rt|
||dg}
|
S )
a	  Flattens arbitrary input structs according to the given spaces struct.

    Returns a single 1D tensor resulting from the different input
    components' values.

    Thereby:
    - Boxes (any shape) get flattened to (B, [T]?, -1). Note that image boxes
    are not treated differently from other types of Boxes and get
    flattened as well.
    - Discrete (int) values are one-hot'd, e.g. a batch of [1, 0, 3] (B=3 with
    Discrete(4) space) results in [[0, 1, 0, 0], [1, 0, 0, 0], [0, 0, 0, 1]].
    - MultiDiscrete values are multi-one-hot'd, e.g. a batch of
    [[0, 2], [1, 4]] (B=2 with MultiDiscrete([2, 5]) space) results in
    [[1, 0,  0, 0, 1, 0, 0], [0, 1,  0, 0, 0, 0, 1]].

    Args:
        inputs: The inputs to be flattened.
        spaces_struct: The (possibly nested) structure of the spaces that `inputs`
            belongs to.
        time_axis: Whether all inputs have a time-axis (after the batch axis).
            If True, will keep not only the batch axis (0th), but the time axis
            (1st) as-is and flatten everything from the 2nd axis up.
        batch_axis: Whether all inputs have a batch axis.
            If True, will keep that batch axis as-is and flatten everything from the
            other dims up.

    Returns:
        A single 1D tensor resulting from concatenating all
        flattened/one-hot'd input components. Depending on the time_axis flag,
        the shape is (B, n) or (B, T, n).

    .. testcode::
        :skipif: True

        # B=2
        from ray.rllib.utils.tf_utils import flatten_inputs_to_1d_tensor
        from gymnasium.spaces import Discrete, Box
        out = flatten_inputs_to_1d_tensor(
            {"a": [1, 0], "b": [[[0.0], [0.1]], [1.0], [1.1]]},
            spaces_struct=dict(a=Discrete(2), b=Box(shape=(2, 1)))
        )
        print(out)

        # B=2; T=2
        out = flatten_inputs_to_1d_tensor(
            ([[1, 0], [0, 1]],
             [[[0.0, 0.1], [1.0, 1.1]], [[2.0, 2.1], [3.0, 3.1]]]),
            spaces_struct=tuple([Discrete(2), Box(shape=(2, ))]),
            time_axis=True
        )
        print(out)

    .. testoutput::

        [[0.0, 1.0,  0.0, 0.1], [1.0, 0.0,  1.0, 1.1]]  # B=2 n=4
        [[[0.0, 1.0, 0.0, 0.1], [1.0, 0.0, 1.0, 1.1]],
        [[1.0, 0.0, 2.0, 2.1], [0.0, 1.0, 3.0, 3.1]]]  # B=2 T=2 n=4
    Nr   r   depthc                    s0   g | ]\}}t  d d |f |dtjqS )Nr`   one_hotrK   r   r:   r-   ir#   Zinput_r&   r'   
<listcomp>-  s    z/flatten_inputs_to_1d_tensor.<locals>.<listcomp>r7   c                    s(   g | ]\}}t  | |d tjqS )r`   rc   re   rg   r&   r'   rh   7  s    )rQ   flattenr"   zipr2   r8   r   r   r<   appendrd   r#   rK   r:   r   r=   	enumerateZnvecfloatarray)r\   r]   r^   r_   Zflat_inputsZflat_spacesBTr5   spacemergedr&   rg   r'   flatten_inputs_to_1d_tensor   sd   B









rs   c                 C   sH   t | tjr| jdd | S t | trtt| S t | tr"t| S | S )a{  Flags actions immutable to notify users when trying to change them.

    Can also be used with any tree-like structure containing either
    dictionaries, numpy arrays or already immutable objects per se.
    Note, however that `tree.map_structure()` will in general not
    include the shallow object containing all others and therefore
    immutability will hold only for all objects contained in it.
    Use `tree.traverse(fun, action, top_down=False)` to include
    also the containing object.

    Args:
        obj: The object to be made immutable.

    Returns:
        The immutable object.

    .. testcode::
        :skipif: True

        import tree
        import numpy as np
        from ray.rllib.utils.numpy import make_action_immutable
        arr = np.arange(1,10)
        d = dict(a = 1, b = (arr, arr))
        tree.traverse(make_action_immutable, d, top_down=False)
    F)write)r8   r   r9   Zsetflagsr   r   dict)objr&   r&   r'   make_action_immutableS  s   

rw         ?deltac                 C   s6   t t | |k t | dd |t | d|   S )z4Reference: https://en.wikipedia.org/wiki/Huber_loss.       @g      ?)r   whereabspower)r?   ry   r&   r&   r'   
huber_lossz  s   .r~   c                 C   s   t t | d S )zComputes half the L2 norm of a tensor (w/o the sqrt): sum(x**2) / 2.

    Args:
        x: The input tensor.

    Returns:
        The l2-loss output according to the above formula given `x`.
    rz   )r   r;   Zsquare)r?   r&   r&   r'   l2_loss  s   
r   initial_internal_statesforget_biasc                 C   s  | j |rdnd }| j |rdnd }|j d d }|du r.tj||fd}	tj||fd}
n|d }	|d }
|rBtj|||fd}n	tj|||fd}t|D ]}|r^| |ddddf n
| dd|ddf }tj||
fdd}t||| }t|dd|d |d f | }t|	|}	t|ddd|f }t|dd||d f }t	|	t||}	t|dd|d |d f }t|t|	}
|r|
||ddddf< qO|
|dd|ddf< qO||	|
ffS )	a  Calculates LSTM layer output given weights/biases, states, and input.

    Args:
        x: The inputs to the LSTM layer including time-rank
            (0th if time-major, else 1st) and the batch-rank
            (1st if time-major, else 0th).
        weights: The weights matrix.
        biases: The biases vector. All 0s if None.
        initial_internal_states: The initial internal
            states to pass into the layer. All 0s if None.
        time_major: Whether to use time-major or not. Default: False.
        forget_bias: Gets added to first sigmoid (forget gate) output.
            Default: 1.0.

    Returns:
        Tuple consisting of 1) The LSTM layer's output and
        2) Tuple: Last (c-state, h-state).
    r   r      Nr1   r7   r      )
r2   r   Zzerosranger=   rZ   sigmoidmultiplytanhadd)r?   rS   rT   r   r*   r   Zsequence_lengthZ
batch_sizeZunitsZc_statesZh_statesZunrolled_outputstZinput_matrixZinput_matmul_matrixZ	sigmoid_1Z	sigmoid_2Ztanh_3Z	sigmoid_4r&   r&   r'   lstm  s4   0$ r   rX   ra   on_value	off_valuer   c                 C   sD  t | trtj| tjd} ntrt | tjr|  } | jtj	kr(| 
tj} d}|dkr3t| d }t| |k sEJ dt| || j}tjg ||R d| }g }t| jD ]5}dg| j }	dg| j }
d|
|< t|| |
}|dkr||d  |	|d < t||	}|| q\||  ||t|< |
|S )a  One-hot utility function for numpy.

    Thanks to qianyizhang:
    https://gist.github.com/qianyizhang/07ee1c15cad08afb03f5de69349efc30.

    Args:
        x: The input to be one-hot encoded.
        depth: The max. number to be one-hot encoded (size of last rank).
        on_value: The value to use for on. Default: 1.0.
        off_value: The value to use for off. Default: 0.0.

    Returns:
        The one-hot encoded equivalent of the input array.
    r   r   r   r   z<ERROR: The max. index of `x` ({}) is larger than depth ({})!r1   rb   )r8   rL   r   rn   rM   rB   rC   rA   r   Zbool_rK   int_maxformatr2   Zonesr   ndimZaranger<   Ztilerk   tuple)r?   ra   r   r   r   r2   r5   indicesrf   Ztilesr.   rr&   r&   r'   rd     s:   



rd   c                    s@   t rtt jr j tj fddt|D ddS )Nc                    sD   g | ]\}}t t d kr| ndd|f |dtjqS )r   Nr`   )rd   r"   rK   r   r:   re   r2   r?   r&   r'   rh     s    2z)one_hot_multidiscrete.<locals>.<listcomp>rb   r7   )rB   r8   rC   rA   r2   r   r=   rl   )r?   Zdepthsr&   r   r'   one_hot_multidiscrete  s   r   alphac                 C   s   t | | | | S )zImplementation of the leaky ReLU function.

    y = x * alpha if x < 0 else x

    Args:
        x: The input values.
        alpha: A scaling ("leak") factor to use for negative x.

    Returns:
        The leaky ReLU output for x.
    )r   maximum)r?   r   r&   r&   r'   relu"  s   r   
derivativec                 C   s$   |r| d|   S ddt |    S )aY  
    Returns the sigmoid function applied to x.
    Alternatively, can return the derivative or the sigmoid function.

    Args:
        x: The input to the sigmoid function.
        derivative: Whether to return the derivative or not.
            Default: False.

    Returns:
        The sigmoid function (or its derivative) applied to x.
    r   )r   exp)r?   r   r&   r&   r'   r   2  s   r   rb   r6   epsilonc                 C   s.   |pt }t| }t|tj||dd |S )a{  Returns the softmax values for x.

    The exact formula used is:
    S(xi) = e^xi / SUMj(e^xj), where j goes over all elements in x.

    Args:
        x: The input to the softmax function.
        axis: The axis along which to softmax.
        epsilon: Optional epsilon as a minimum value. If None, use
            `SMALL_NUMBER`.

    Returns:
        The softmax over x.
    T)Zkeepdims)SMALL_NUMBERr   r   r   r;   )r?   r6   r   Zx_expr&   r&   r'   softmaxF  s   
r   )r   r+   )T)NN)NFT)rx   )NNFrx   )rX   rY   )rb   N)8collectionsr   typesr   typingr   r   rA   r   rQ   Zgymnasium.spacesr   r   Zray._common.deprecationr   Zray.rllib.utils.annotationsr	   Zray.rllib.utils.frameworkr
   r   Zray.rllib.utils.typingr   r   r   r   Ztf1rG   ZtfvrB   _r   ZLARGE_INTEGERZMIN_LOG_NN_OUTPUTZMAX_LOG_NN_OUTPUTrL   r9   r(   boolr>   rR   strr[   rs   rw   rm   r~   r   r   r:   typerd   r   r   r   listr   r&   r&   r&   r'   <module>   s    
6)) 
&E
<