o
    pi
,                     @  s   d dl mZ d dlmZ d dlmZmZ d dlmZ ddl	m
Z
mZmZ ddlmZ ddlmZ dd	lmZmZmZ erJd d
lmZ d dlmZmZ g Zeddddd	d4d5ddZ			d6d7d%d&Zd8d)d*Z	+		,d9d:d2d3ZdS );    )annotations)TYPE_CHECKING)_C_opstensor)
deprecated   )check_dtype
check_typecheck_variable_and_dtype)LayerHelper)Variable)convert_np_dtype_to_dtype_corein_dynamic_or_pir_mode)Tensor)DataLayout2D	DTypeLikez2.5.2zpaddle.diag_embed   z<diag_embed in paddle.nn.functional will be removed in future)ZsinceZ	update_tolevelreasoninputr   offsetintdim1dim2returnc                 C  s   t | |||S )N)r   
diag_embed)r   r   r   r    r   e/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/paddle/nn/functional/extension.pyr   ,   s   	r   Nint64xmaxlen
int | Nonedtyper   name
str | Nonec                 C  s   t  r#t|tjjtjfst|}|du rd}t| ||}d|_	|S t
di t }|j|d}d| gi}d|ji}|durMt|trI||d< n||d	< |jd|d
|i|d d|_	|S )a  
    **SequenceMask Layer**

    This layer outputs a mask according to the input :code:`x` and
    :code:`maxlen` with data type of :code:`dtype`.

    Supposing :code:`x` is a Tensor with shape [d_1, d_2, ..., d_n], the
    :code:`y` is a mask with shape [d_1, d_2, ..., d_n, maxlen], where:

    .. math::

        y(i_1, i_2,..., i_n, j) = (j < x(i_1, i_2,..., i_n))

    .. code-block:: text

        Case:

        Consider input:
            x = [3, 1, 1, 0]    max_len = 4

        then we get out:
            mask = [[1, 1, 1, 0],
                    [1, 0, 0, 0],
                    [1, 0, 0, 0],
                    [0, 0, 0, 0]]

    Args:
        x (Variable): Input tensor of sequence_mask layer, \
            whose elements are integers less than :code:`maxlen`. \
            Tensor with shape [d_1, d_2, ..., d_n].
        maxlen (int|None, optional): Maximum length of the sequence. If :code:`maxlen` \
                           is None, it would be replace with :math:`max(x)`.
        dtype (np.dtype|paddle.dtype|str, optional): Data type of the output, \
             ``int64`` by default.
        name(str|None, optional): For detailed information, please refer \
            to :ref:`api_guide_Name`. Usually name is no need to set and \
            None by default.

    Returns:
            Tensor, The output sequence mask. Tensor with shape [d_1, d_2, ..., d_n, maxlen] \
            and data type of :code:`dtype`. The data type should be bool, float32, float64, int8, \
            int32 or int64.

    Examples:
        .. code-block:: python

            >>> import paddle

            >>> lengths = paddle.to_tensor([10, 9, 8])
            >>> mask = paddle.nn.functional.sequence_mask(lengths)

            >>> print(mask)
            Tensor(shape=[3, 10], dtype=int64, place=Place(cpu), stop_gradient=True,
            [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
             [1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
             [1, 1, 1, 1, 1, 1, 1, 1, 0, 0]])

    Nr   Tsequence_maskr%   XZ	out_dtypeZMaxLenTensorr#   Ytypeinputsoutputsattrs)r(   )r   
isinstancer   ZVarDescZVarTypeZDataTyper   r   r(   Zstop_gradientr   locals"create_variable_for_type_inferencer%   r   	append_op)r"   r#   r%   r&   outhelperr.   r0   r   r   r    r(   8   s*   A



r(   idsparentsc                 C  s   | j dkr	td| j |j krtdt r&t|jdddgd t| |S tdi t }t	| dddgd t	|dddgd |j
| jd	}|jd| |d
d|id |S )a	  
    To be used after beam search. After beam search, we get selected ids at
    each time step and the corresponding parents in the search tree. Both ids
    and parents have the layout :attr:`[max_time, batch_size, beam_size]`. Then
    :attr:`gather_tree` is used to backtrace from the last time step and
    generate the full sequences by collecting selected ids.

    Here is an example:

    .. code-block:: text

            Given:
                ids = [[[2 2]
                        [6 1]]
                       [[3 9]
                        [6 1]]
                       [[0 1]
                        [9 0]]]
                parents = [[[0 0]
                            [1 1]]
                           [[1 0]
                            [1 0]]
                           [[0 0]
                            [0 1]]]

            Then:
                gather_tree(ids, parents)
                         = [[[2 2]
                             [1 6]]
                            [[3 3]
                             [6 1]]
                            [[0 1]
                             [9 0]]]

    Args:
        ids(Tensor): A Tensor with shape :attr:`[length, batch_size, beam_size]`
            and data type :attr:`int32` or :attr:`int64`. It contains the selected
            ids of all time steps.
        parents(Tensor): A Tensor with the same shape and data type as :attr:`ids`,
            It contains the parents corresponding to selected ids when searching
            among beams.

    Returns:
            A Tensor with the same shape and data type as :attr:`ids`. \
            It contains the full sequences. The sequences are collected from \
            :attr:`ids` by backtracing according to :attr:`parents`.

    Examples:
        .. code-block:: python

            >>> import paddle

            >>> ids = paddle.to_tensor([[[2, 2], [6, 1]], [[3, 9], [6, 1]], [[0, 1], [9, 0]]])

            >>> parents = paddle.to_tensor([[[0, 0], [1, 1]], [[1, 0], [1, 0]], [[0, 0], [0, 1]]])

            >>> final_sequences = paddle.nn.functional.gather_tree(ids, parents)
            >>> [[[2, 2], [1, 6]], [[3, 3], [6, 1]], [[0, 1], [9, 0]]]
            >>> final_sequences = paddle.nn.functional.gather_tree(ids, parents)
            >>> print(final_sequences)
            Tensor(shape=[3, 2, 2], dtype=int64, place=Place(cpu), stop_gradient=True,
            [[[2, 2],
              [1, 6]],
             [[3, 3],
              [6, 1]],
             [[0, 1],
              [9, 0]]])


    r   zLThe input ids must be a 3D tensor with shape [length, batch_size, beam_size]z4The ids's shape must be the same as parents' shape. r8   Zint32r!   gather_treer7   r)   )ZIdsZParentsOut)r-   r.   r/   N)r9   )ndim
ValueErrorr   r   r%   r   r9   r   r2   r
   r3   r4   )r7   r8   r6   r5   r   r   r    r9      s*   
Gr9         ?NCHWseg_numshift_ratiofloatdata_formatDataLayout2D | strc                 C  s   |dvrt d| dt rt| |||S tdi t }t| dg dd t|dtd t|dt	d |j
| jd	}t|tsFtd
|jdd| id|i|||dd |S )a  

    **Temporal Shift Operator**

    Calculate the temporal shifting features for Input(X).

    Input(X) should be in shape of [N*T, C, H, W] or [N*T, H, W, C], while
    N is the batch size, T is the temporal segment number specified by
    :attr:`seg_num`, C is the channel number, H and W is the height and
    width of features.

    Temporal Shifting is calculated as follows when data format is NCHW:

    Step 1: Reshape Input(X) to [N, T, C, H, W].

    Step 2: Pad 0 to reshaping result in the 2nd(T) dimension with
    padding width as 1 on each side, padding result will be in shape
    of [N, T+2, C, H, W].

    Step 3: Assume :attr:`shift_ratio` is :math:`1/4`, slice padding
    result as follows:

    $$
    slice1 = x[:, :T, :C/4, :, :]
    $$
    $$
    slice2 = x[:, 2:T+2, C/4:C/2, :, :]
    $$
    $$
    slice3 = x[:, 1:T+1, C/2:, :, :]
    $$

    Step 4: Concatenate three slices along the 3rd(C) dimension and
    reshape result to [N*T, C, H, W].

    For details of temporal shifting, please refer to paper:
    `Temporal Shift Module <http://arxiv.org/abs/1811.08383>`_ .

    Args:
        x(Tensor): ${x_comment}
        seg_num(int): ${seg_num_comment}
        shift_ratio(float): ${shift_ratio_comment}
        name(str|None, optional): For detailed information, please refer
                             to :ref:`api_guide_Name`. Usually name is no need to set and
                             None by default.
        data_format(str, optional): Data format that specifies the layout of input.
            It can be "NCHW" or "NHWC". Default: "NCHW".

    Returns:
        out(Tensor): The temporal shifting result is a tensor with the
        same shape and same data type as the input.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> import paddle.nn.functional as F

            >>> input = paddle.randn([6, 4, 2, 2])
            >>> out = F.temporal_shift(x=input, seg_num=2, shift_ratio=0.2)
    )r>   ZNHWCzJAttr(data_format) should be 'NCHW' or 'NHWC'. Received Attr(data_format): .temporal_shiftr"   )Zfloat16Zuint16Zfloat32Zfloat64r?   r@   r)   zseg_num must be int type.r*   r:   )r?   r@   rB   r,   N)rE   )r<   r   r   rE   r   r2   r
   r	   r   rA   r3   r%   r1   	TypeErrorr4   )r"   r?   r@   r&   rB   r6   r5   r   r   r    rE      s<   D

rE   )r   r   r   )
r   r   r   r   r   r   r   r   r   r   )Nr!   N)
r"   r   r#   r$   r%   r   r&   r'   r   r   )r7   r   r8   r   r   r   )r=   Nr>   )r"   r   r?   r   r@   rA   r&   r'   rB   rC   r   r   )
__future__r   typingr   Zpaddler   r   Zpaddle.utilsr   Zbase.data_feederr   r	   r
   Zbase.layer_helperr   Zcommon_ops_importr   Z	frameworkr   r   r   r   Zpaddle._typingr   r   __all__r   r(   r9   rE   r   r   r   r    <module>   s:   
]e