o
    pi                     @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ g Zdd	d
Zdd Zdd Zdd ZG dd dZdd Zdd Zdd ZdddZdS )    N)_C_ops)core)check_variable_and_dtype)EagerParamBase)LayerHelper)in_dynamic_or_pir_modec                 C   sl   t  r	t| |S t| dg dd td	i t }|j| j| jd}|j	dd| gid|gid|id |S )
aI  

    This OP takes in the Tensor :attr:`x` and change it to output with
    aclFormat with int value. This API is only used for Ascend NPU.

    Args:
        x(Tensor): An input N-D Tensor with data type bool, float16,
                   float32, float64, int32, int64, int16, int8, uint8.
        format(int): Storage data format of the output in aclFormat,
                     default value is -1.

    Returns:
        Tensor: A Tensor with acl storage format on Ascend NPU.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:NPU)
            >>> import paddle
            >>> paddle.device.set_device('npu')

            >>> x = paddle.ones(shape=[6])
            >>> y = paddle.incubate._npu_identity(x, 3) # ACL_FORMAT_NC1HWC0 = 3
            >>> print(y.shape)
            [1, 1, 1, 1, 16]
    x)	boolZint8Zuint8Zint16Zint32Zint64Zfloat16Zfloat32Zfloat64npu_identity)dtypestop_gradientoutformat)typeZinputsZoutputsattrsN)r   )
r   r   r   r   r   localsZ"create_variable_for_type_inferencer   r   Z	append_op)r	   r   helperr    r   j/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/paddle/incubate/tensor/manipulation.py_npu_identity   s&   r   c                 C   sj   t | trt| j}t| j| jfi |}||| }||fS t | tjr3t	j
 }||| }||fS dS )zt
    Helper to create a new destination tensor and call 'func(dst, src)'
    which is either offload or reload.
    N)
isinstancer   copydeepcopy__dict__shaper   paddleZTensorr   eager)
src_tensorfuncstate	new_paramtaskZnew_varbaser   r   r   _load_reload_implZ   s   



r#   c                  C   s8   t j } t  rt S tdd | D rdS t S )zf
    Constructs a new AsyncLoad object.
    It is used to load/reload data asynchronously on GPU.
    c                 s       | ]}t |V  qd S Nr   Zis_compiled_with_custom_device.0devr   r   r   	<genexpr>r       

z$create_async_load.<locals>.<genexpr>N)r   deviceget_all_custom_device_typeis_compiled_with_xpur   XpuAsyncLoadanyZ	AsyncLoad)custom_devicesr   r   r   create_async_loadj   s   
r2   c                   C   s   t  S )zf
    Constructs a new AsyncLoad object.
    It is used to load/reload data asynchronously on XPU.
    )r   r/   r   r   r   r   create_xpu_async_loadz   s   r3   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	_NoopAsyncTasku(   A dummy Task for sync‐fallback on XPU.c                 C   s   dS )NTr   selfr   r   r   is_completed      z_NoopAsyncTask.is_completedc                 C      d S r%   r   r5   r   r   r   cpu_wait   r8   z_NoopAsyncTask.cpu_waitc                 C   r9   r%   r   r5   r   r   r   xpu_wait   r8   z_NoopAsyncTask.xpu_waitN)__name__
__module____qualname____doc__r7   r:   r;   r   r   r   r   r4      s
    r4   c                 C   s   t  ot| do| j }t j }tdd |D o't| do'| j |v }|s,|r@| 	 }t j
| t  d}|t fS t| |jS )a  
    Loads the source tensor into the destination tensor asynchronously.

    Args:
        src_tensor (EagerParamBase|paddle.Tensor): The source tensor.
        async_load (core.AsyncLoad): The AsyncLoad object.

    Returns:
        tuple: A tuple containing two elements:
         - dest_tensor (EagerParamBase|paddle.Tensor): The destination tensor.
         - task (Task): The task that loads the source tensor into the destination tensor.
    placec                 s   r$   r%   r&   r'   r   r   r   r*      r+   z async_offload.<locals>.<genexpr>r@   )r   r.   hasattrr@   Zis_xpu_placer,   r-   r0   Zcustom_device_typecpu	to_tensornumpyZCPUPlacer4   r#   Zoffload)r   
async_loadZis_xpu_tensorr1   Zis_custom_tensorZhost_tensorr   r   r   r   async_offload   s$   

rG   c                 C   sN   t  r!t| dr!| j r!|  }t j|t dd}|t fS t	| |j
S )a  
    Reloads the source tensor into the destination tensor asynchronously.

    Args:
        src_tensor (EagerParamBase|paddle.Tensor): The source tensor.
        async_load (core.AsyncLoad): The AsyncLoad object.

    Returns:
        tuple: A tuple containing two elements:
         - dest_tensor (EagerParamBase|paddle.Tensor): The destination tensor.
         - task (Task): The task that reloads the source tensor into the destination tensor.
    r@   r   rA   )r   r.   rB   r@   Zis_cpu_placerE   rD   ZXPUPlacer4   r#   reload)r   rF   ZarrZxpur   r   r   async_reload   s   
rI   c                 C   sR   t | jdksJ dt |jdksJ d| j|jks J d||| |||S )ac  
    Offloading the source tensor into the destination tensor asynchronously with offset and size customized.

    Args:
        src_tensor (EagerParamBase|paddle.Tensor): The source tensor.
        dst_tensor (EagerParamBase|paddle.Tensor): The destination tensor.
        src_offset (int): The element offset of the source tensor.
        dst_offset (int): The element offset of the destination tensor.
        offload_size (int): The size of the data to be loaded.
        async_loader (core.AsyncLoad): The AsyncLoad object.

    Returns:
        task (Task): The task that operates partial offloading.
       zOnly support 1-D tensorzOnly support same dtype)lenr   r   Zoffload_with_offset)r   Z
dst_tensorZ
src_offsetZ
dst_offsetZoffload_sizeZasync_loaderr   r   r   async_offload_with_offset   s   
rL   TrJ   c                 C   sX   |rt d|i t j  t j|   dS t ddi t j  t jg  dS )z#
    Enable activation offload
    ZFLAGS_offload_retry_timesr   N)r   Z	set_flagsr   Zregister_offload_callbackZ!set_skip_offload_callback_tensors
parametersZclear_offload_callback)modelenableZretry_timesr   r   r   enable_activation_offload   s   

rP   )r   )TrJ   )r   r   r   Zpaddle.baser   Zpaddle.base.data_feederr   Zpaddle.base.frameworkr   Zpaddle.base.layer_helperr   Zpaddle.frameworkr   __all__r   r#   r2   r3   r4   rG   rI   rL   rP   r   r   r   r   <module>   s$   
<&