o
    * i                     @  sx  U d dl mZ d dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
Z
d dlmZmZ d dlmZmZmZmZ ddlmZmZ erd d	lmZ d d
l
mZmZ d dlmZ d dlmZ eej ej!ej"f Z#eej$ej%ej&f Z'd dl
m(Z(m)Z) d dl*m+Z+ ee(e)e,e-f Z.de/d< g dZ0da1dcddZ2ddddZ3deddZddddZ4dfd"d#Zdgd%d&Z5dhd)d*Z6did+d,Z7djd-d.Z8dkdld1d2Z9dmd4d5Z:dnd7d8Z;dmd9d:Z<dnd;d<Z=	dkdod?d@Z>dpdBdCZ?dqdEdFZ@dkdrdGdHZAdkdrdIdJZBdkdsdKdLZCdkdsdMdNZDdkdrdOdPZEdkdrdQdRZFG dSdT dTZGG dUdV dVZHdkdtdXdYZIdud[d\ZJG d]d^ d^ZKG d_d` d`ZLdkdvdadbZMdS )w    )annotationsN)TYPE_CHECKINGUnion)	TypeAlias)core	framework)is_compiled_with_cinnis_compiled_with_cudais_compiled_with_distributeis_compiled_with_rocm   )cudaxpu)TracebackType)IPUPlaceXPUPlace)	PlaceLike)Place)	CUDAPlaceCustomPlace)_customDevicePropertiesr   _CustomPlaceLike)get_cudnn_version
set_device
get_devicer   r   is_compiled_with_xpuis_compiled_with_ipur   r	   r   r
   is_compiled_with_custom_deviceget_all_device_typeget_all_custom_device_typeget_available_deviceget_available_custom_deviceget_device_propertiesStreamEventcurrent_stream
set_streamstream_guarddevice_guardsynchronizedevice_typestrreturnboolc                 C  
   t | S )a  

    Whether paddle was built with Paddle_CUSTOM_DEVICE .

    Args:
        device_type (str): the registered device type, like "npu".

    Return:
        bool, ``True`` if CustomDevice is supported, otherwise ``False``.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> support_npu = paddle.device.is_compiled_with_custom_device("npu")

    )r   r   )r*    r/   b/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/device/__init__.pyr   ]      
r   c                   C     t  S )a  

    Whether paddle was built with WITH_IPU=ON to support Graphcore IPU.

    Returns (bool): `True` if IPU is supported, otherwise `False`.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> support_ipu = paddle.is_compiled_with_ipu()

    )r   r   r/   r/   r/   r0   r   r      r   	_IPUPlacec                   C  r2   )a  

    Return a Graphcore IPU Place

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:IPU)

            >>> import paddle
            >>> paddle.device.set_device('ipu')
            >>> place = paddle.device.IPUPlace()

    )r   r   r/   r/   r/   r0   r      s   r   c                   C  r2   )a  

    Whether paddle was built with WITH_XPU=ON to support Baidu Kunlun

    Returns (bool): whether paddle was built with WITH_XPU=ON

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> support_xpu = paddle.device.is_compiled_with_xpu()

    )r   r   r/   r/   r/   r0   r      r3   r   dev_idint	_XPUPlacec                 C  r.   )a<  

    Return a Baidu Kunlun Place

    Args:
        dev_id(int): Baidu Kunlun device id

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:XPU)

            >>> import paddle
            >>> paddle.device.set_device('xpu')
            >>> place = paddle.device.XPUPlace(0)

    )r   r   )r5   r/   r/   r0   r      r1   r   
int | Nonec                  C  s8   t  sdS tdu rtt  } | atdk rdS | S tS )a  

    This function return the version of cudnn. the return value is int which represents the
    cudnn version. For example, if it return 7600, it represents the version of cudnn is 7.6.

    Returns:
        int: A int value which represents the cudnn version. If cudnn version is not installed, it return None.

    Examples:
        .. code-block:: python

            >>> import paddle

            >>> cudnn_version = paddle.device.get_cudnn_version()



    Nr   )r   r	   _cudnn_versionr6   cudnn_version)r:   r/   r/   r0   r      s   r   devicer   c           
   
   C  sH  t | ts| S |  }|dr|dd}| t v r7td|  dd	d}t
|d }t| |}|S |dkrAt }|S |dksI|d	kr\t sQtd
ttj j}|S |dkr~t shtdtdd	d}t
|d }t|}|S |dkrt stdt }|S td|ptd|}td|}|rt std| d| 	dd}|d }t
|}t|}|rt std| d| 	dd}|d }t
|}t|}|s"|s"| 	dd}|d }	|	t v r|d }t
|}t|	|}|S tdddd dd	ddgt D |S )Nr   gpuZFLAGS_selected_s0,r   cpuZdcuzLThe device should not be 'gpu', since PaddlePaddle is not compiled with CUDAr   zKThe device should not be 'xpu', since PaddlePaddle is not compiled with XPUZFLAGS_selected_xpusZipuzKThe device should not be 'ipu', since PaddlePaddle is not compiled with IPUzgpu:\d+zdcu:\d+zxpu:\d+zThe device should not be z., since PaddlePaddle is not compiled with CUDA:r   z-, since PaddlePaddle is not compiled with XPUz3The device must be a string which is like 'cpu', {}z, c                 s  s"    | ]}d | d| dV  qdS )'z', 'z:x'Nr/   ).0xr/   r/   r0   	<genexpr>%  s
    
z$_convert_to_place.<locals>.<genexpr>Znpu)
isinstancer+   lower
startswithreplacer   r   osgetenvsplitr6   r   CPUPlacer	   
ValueErrorr   paddledistributedZParallelEnvr5   r   r   r   r   rematchformatjoin)
r;   Zlower_deviceZselected_devices	device_idplaceZselected_xpusZavailable_gpu_deviceZavailable_xpu_deviceZdevice_info_listr*   r/   r/   r0   _convert_to_place   s   

IG@
70




rW   c                 C  s   t | }t| |S )a]  

    Paddle supports running calculations on various types of devices, including CPU, GPU, XPU, NPU and IPU.
    They are represented by string identifiers. This function can specify the global device
    which the OP will run.

    Args:
        device(str): This parameter determines the specific running device.
            It can be ``cpu``, ``gpu``, ``xpu``, ``npu``, ``gpu:x``, ``xpu:x``, ``npu:x`` and ``ipu``,
            where ``x`` is the index of the GPUs, XPUs or NPUs.

    Returns:
        Place,the Place to set.

    Examples:

        .. code-block:: python

            >>> import paddle

            >>> paddle.device.set_device("cpu")
            >>> x1 = paddle.ones(name='x1', shape=[1, 2], dtype='int32')
            >>> x2 = paddle.zeros(name='x2', shape=[1, 2], dtype='int32')
            >>> data = paddle.stack([x1,x2], axis=1)

    )rW   r   _set_expected_placer;   rV   r/   r/   r0   r   4  s   
r   c                  C  s   d} t  }t|tjrd} | S t|tjr"| }dt| } | S t|tjr4| }dt| } | S t|tj	rHt
 }d|d  d} | S t|tjr`| }| }|d t| } | S td	| d
)a  

    This function can get the current global device of the program is running.
    It's a string which is like 'cpu', 'gpu:x', 'xpu:x' and 'npu:x'. if the global device is not
    set, it will return a string which is 'gpu:x' when cuda is available or it
    will return a string which is 'cpu' when cuda is not available.

    Examples:

        .. code-block:: python

            >>> import paddle
            >>> device = paddle.device.get_device()

     r@   gpu:zxpu:zipus:{0-r   }rA   zThe device specification z is invalid)r   _current_expected_place_rF   r   rM   r   get_device_idr+   r   r   Zget_ipu_device_countr   get_device_typerN   )r;   rV   rU   Znum_devicesr*   r/   r/   r0   r   T  s.   r   dev_type
str | Nonec                 C  s   | du r:t  rttdrt }|S d}|S ttdr6t }|r2ttdr.t|d nd}|S d}|S td| dkrSt  rOttdrKt nd}|S tdttd	rmt| rmttdrit| }|S d}|S td
|  )a"  
    Return the number of devices available.
    Args:
        dev_type (str, optional): Device type string, e.g., 'gpu', 'npu', etc.
        If None, will return the number of CUDA devices if available,
        otherwise the first available custom device count.
    Returns:
        int: the number of devices available.
    Examples:
        .. code-block:: python
            >>> import paddle
            >>> paddle.device.device_count()
            >>> paddle.device.device_count('gpu')
            >>> paddle.device.device_count('npu')
    Nget_cuda_device_countr   r   get_custom_device_countz1Paddle is not compiled with GPU or Custom Device.r<   z Paddle is not compiled with GPU.r   z(Unsupported or unavailable device type: )	rO   r	   hasattrr   rb   r   rc   rN   r   )r`   numZcustom_typesr/   r/   r0   device_count{  sZ   


		rf   	list[str]c                   C  r2   )aM  

    Get all available device types.

    Returns:
        A list of all available device types.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> paddle.device.get_all_device_type()

            >>> # Case 1: paddlepaddle-cpu package installed, and no custom device registered.
            >>> # Output: ['cpu']

            >>> # Case 2: paddlepaddle-gpu package installed, and no custom device registered.
            >>> # Output: ['cpu', 'gpu']

            >>> # Case 3: paddlepaddle-cpu package installed, and custom device 'CustomCPU' is registered.
            >>> # Output: ['cpu', 'CustomCPU']

            >>> # Case 4: paddlepaddle-gpu package installed, and custom device 'CustomCPU' and 'CustomGPU' is registered.
            >>> # Output: ['cpu', 'gpu', 'CustomCPU', 'CustomGPU']

    )r   r   r/   r/   r/   r0   r        r   list[str] | Nonec                   C  r2   )a-  

    Get all available custom device types.

    Returns:
        A list of all available custom device types.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> paddle.device.get_all_custom_device_type()

            >>> # Case 1: paddlepaddle-gpu package installed, and no custom device registered.
            >>> # Output: None

            >>> # Case 2: paddlepaddle-gpu package installed, and custom device 'CustomCPU' and 'CustomGPU' is registered.
            >>> # Output: ['CustomCPU', 'CustomGPU']

    )r   r   r/   r/   r/   r0   r        r   c                   C  r2   )ak  

    Get all available devices.

    Returns:
        A list of all available devices.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> paddle.device.get_available_device()

            >>> # Case 1: paddlepaddle-cpu package installed, and no custom device registered.
            >>> # Output: ['cpu']

            >>> # Case 2: paddlepaddle-gpu package installed, and no custom device registered.
            >>> # Output: ['cpu', 'gpu:0', 'gpu:1']

            >>> # Case 3: paddlepaddle-cpu package installed, and custom device 'CustomCPU' is registered.
            >>> # Output: ['cpu', 'CustomCPU']

            >>> # Case 4: paddlepaddle-gpu package installed, and custom device 'CustomCPU' and 'CustomGPU' is registered.
            >>> # Output: ['cpu', 'gpu:0', 'gpu:1', 'CustomCPU', 'CustomGPU:0', 'CustomGPU:1']

    )r   r    r/   r/   r/   r0   r      rh   r    c                   C  r2   )a4  

    Get all available custom devices.

    Returns:
       A list of all available custom devices.

    Examples:
        .. code-block:: python

            >>> import paddle
            >>> paddle.device.get_available_custom_device()

            >>> # Case 1: paddlepaddle-gpu package installed, and no custom device registered.
            >>> # Output: None

            >>> # Case 2: paddlepaddle-gpu package installed, and custom device 'CustomCPU' and 'CustomGPU' is registered.
            >>> # Output: ['CustomCPU', 'CustomGPU:0', 'CustomGPU:1']

    )r   r!   r/   r/   r/   r0   r!     rj   r!   _CustomPlaceLike | Noner   c                 C  s   d}| dur@t | tr8| d}|dkr| }d}n/| d| }| |d d }| s3td| dt|}ntd|  d	td|  d	|d
krStjj	|S t
|s`td| dt
	||S )a?  

    Return the properties of given device.

    Args:
        device(|paddle.CustomPlace|int|str|None, optional): The device, the id of the device or
            the string name of device like npu:x' which to get the properties of the
            device from. If device is None, the device is the current device.
            Default: None.

    Returns:
       _customDeviceProperties: The properties of the device which include ASCII string
        identifying device, major compute capability, minor compute capability, global
        memory available and the number of multiprocessors on the device.

    Examples:
        .. code-block:: python

            >>> # import paddle
            >>> # paddle.device.set_device('npu')
            >>> # paddle.device.get_device_properties('npu:0')
            >>> # _customDeviceProperties(name='', major=0, minor=0, total_memory=0MB, multi_processor_count=0)

            >>> # paddle.device.get_device_properties('npu')
            >>> # _customDeviceProperties(name='', major=0, minor=0, total_memory=0MB, multi_processor_count=0)
    NrA   r   r   zInvalid device ID 'z:'. After colon must be digits only. Example: 'metax_gpu:0'zThe input: z is not expected. Because paddle.device.get_device_properties only support str. Please input appropriate device again!Example: 'metax_gpu:0'r<   z/PaddlePaddle is not compiled with support for 'zT' device. Please reinstall PaddlePaddle with Custom Device support to call this API.)rF   r+   rfindisdigitrN   r6   rO   r;   r   r"   r   r   )r;   Zdevice_nameZ	colon_idxrU   device_id_strr/   r/   r0   r"   '  s6   







r"   op_namec           	   	   C  s  | du rt  S t| tr[| }t  rd}nd}t  }t|dkr8| dkr+|d }n!td|  d|d  d|D ]}|d\}}t|| krK|}q:|du rZtd|  d	| nVt| t j	rhd}| 
 }nIt| t jrw|  }| 
 }n:t| tr| d
rt| dd }n'd| v r| dd\}}t|}ntd|  d| dtd|  d| d|dksJ d| dt  r|t k sJ d| dt  |S |t |k sJ d| d| dt | |S )a  
    Return the id of the given device. It is just a utility that will not be exposed to users.

    Args:
        device(paddle.CUDAPlace|paddle.CustomPlace|int|str): The device, the id of the device or
            the string name of device like 'gpu:x' or 'custom_device:x'.
            Default: None.

    Return:
        int: The id of the given device. If device is None, return the id of current device.
    Nr<   r   r   z
Device id z) not found in available_custom_devices: [z:0]rA   z( not found in available_custom_devices: r[      zThe current string z is not expected. Because zi only support string which is like 'gpu:x' or '<custom_device>:x'. Please input appropriate string again!zThe device type z only support int, str (format 'gpu:x' or '<custom_device>:x'), paddle.CUDAPlace or paddle.CustomPlace. Please input appropriate device again!z4The device id must be not less than 0, but got id = .zThe device id z exceeds gpu card number z	 exceeds z device card number )r   Zget_cuda_current_device_idrF   r6   r	   r!   lenrN   rL   r   r^   r   r_   r+   rH   rf   rc   )	r;   rp   rU   r*   Zavailable_custom_devicesdr`   r5   ro   r/   r/   r0   extract_device_idu  sl   








ru   Nonec                  C  s@   t j } t rt  dS t| d rt  dS td)a  
    Releases idle cached memory held by the allocator so that those can be used in other GPU
    application and visible in `nvidia-smi`. In most cases you don't need to use this function,
    Paddle does not release the memory back to the OS when you remove Tensors on the GPU,
    Because it keeps gpu memory in a pool so that next allocations can be done much faster.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:GPU)
            >>> import paddle
            >>> paddle.device.set_device('gpu')

            >>> tensor = paddle.randn([512, 512, 512], "float64")
            >>> del tensor
            >>> paddle.device.empty_cache()
    r   zThe API paddle.device.empty_cache is not supported in CPU-only PaddlePaddle. Please reinstall PaddlePaddle with GPU or custom device support to call this API.N)	rO   r;   r   r   r	   Zcuda_empty_cacher   Zdevice_empty_cacherN   )custom_devicesr/   r/   r0   empty_cache  s   
rx   c                 C  P   d}t j }t s|rt|d std| dt| |d}td|S )a  
    Return the peak size of memory that is allocated to tensor of the given device. This

    Note:
        The size of memory allocated to tensor is 256-byte aligned in Paddle, which may larger than the memory size that tensor actually need.
        For instance, a float32 0-D Tensor with shape [] will take up 256 bytes memory, even though storing a float32 data requires only 4 bytes.

    Args:
        device(paddle.CUDAPlace|paddle.CustomPlace|int|str|None, optional): The device, the id of the device or
            the string name of device like 'gpu:x'. If device is None, the device is the current device.
            Default: None.

    Return:
        int: The peak size of memory that is allocated to tensor of the given device, in bytes.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:GPU)
            >>> import paddle
            >>> paddle.device.set_device('gpu')  # or '<custom_device>'

            >>> max_memory_allocated_size = paddle.device.max_memory_allocated(paddle.CUDAPlace(0))
            >>> max_memory_allocated_size = paddle.device.max_memory_allocated(0)
            >>> max_memory_allocated_size = paddle.device.max_memory_allocated("gpu:0")
    z"paddle.device.max_memory_allocatedr   The API } is not supported in CPU-only PaddlePaddle. Please reinstall PaddlePaddle with GPU or custom device support to call this API.rp   	Allocated	rO   r;   r   r   r	   r   rN   ru   Zdevice_memory_stat_peak_valuer;   namerw   rU   r/   r/   r0   max_memory_allocated     

r   c                 C  ry   )a  
    Return the peak size of memory that is held by the allocator of the given device.

    Args:
        device(paddle.CUDAPlace|paddle.CustomPlace|int|str|None, optional): The device, the id of the device or
            the string name of device like 'gpu:x'. If device is None, the device is the current device.
            Default: None.

    Return:
        int: The peak size of memory that is held by the allocator of the given device, in bytes.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:GPU)
            >>> import paddle
            >>> paddle.device.set_device('gpu')  # or '<custom_device>'

            >>> max_memory_reserved_size = paddle.device.max_memory_reserved(paddle.CUDAPlace(0))
            >>> max_memory_reserved_size = paddle.device.max_memory_reserved(0)
            >>> max_memory_reserved_size = paddle.device.max_memory_reserved("gpu:0")
    z!paddle.device.max_memory_reservedr   rz   r{   r|   Reservedr~   r   r/   r/   r0   max_memory_reserved     

r   c                 C  T   d}t j }t s|rt|d std| dt| |d}td| dS )a  
    Reset the peak size of memory that is allocated to tensor of the given device.

    Args:
        device(paddle.CUDAPlace|paddle.CustomPlace|int|str|None, optional): The device, the id of the device or
            the string name of device like 'gpu:x'. If device is None, the device is the current device.
            Default: None.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:GPU)
            >>> import paddle
            >>> paddle.device.set_device('gpu')  # or '<custom_device>'

            >>> paddle.device.reset_max_memory_allocated(paddle.CUDAPlace(0))
            >>> paddle.device.reset_max_memory_allocated(0)
            >>> paddle.device.reset_max_memory_allocated("gpu:0")
    z(paddle.device.reset_max_memory_allocatedr   rz   r{   r|   r}   N	rO   r;   r   r   r	   r   rN   ru   Z#device_memory_stat_reset_peak_valuer   r/   r/   r0   reset_max_memory_allocated2     

r   c                 C  r   )a  
    Reset the peak size of memory that is held by the allocator of the given device.

    Args:
        device(paddle.CUDAPlace|paddle.CustomPlace|int|str|None, optional): The device, the id of the device or
            the string name of device like 'gpu:x'. If device is None, the device is the current device.
            Default: None.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:GPU)
            >>> import paddle
            >>> paddle.device.set_device('gpu')  # or '<custom_device>'

            >>> paddle.device.reset_max_memory_reserved(paddle.CUDAPlace(0))
            >>> paddle.device.reset_max_memory_reserved(0)
            >>> paddle.device.reset_max_memory_reserved("gpu:0")
    z'paddle.device.reset_max_memory_reservedr   rz   r{   r|   r   Nr   r   r/   r/   r0   reset_max_memory_reservedW  r   r   c                 C  ry   )a  
    Return the current size of memory that is allocated to tensor of the given device.

    Note:
        The size of memory allocated to tensor is 256-byte aligned in Paddle, which may be larger than the memory size that tensor actually need.
        For instance, a float32 0-D Tensor with shape [] will take up 256 bytes memory, even though storing a float32 data requires only 4 bytes.

    Args:
        device(paddle.CUDAPlace|paddle.CustomPlace|int|str|None, optional): The device, the id of the device or
            the string name of device like 'gpu:x'. If device is None, the device is the current device.
            Default: None.

    Return:
        int: The current size of memory that is allocated to tensor of the given device, in bytes.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:GPU)
            >>> import paddle
            >>> paddle.device.set_device('gpu')  # or '<custom_device>'

            >>> memory_allocated_size = paddle.device.memory_allocated(paddle.CUDAPlace(0))
            >>> memory_allocated_size = paddle.device.memory_allocated(0)
            >>> memory_allocated_size = paddle.device.memory_allocated("gpu:0")
    zpaddle.device.memory_allocatedr   rz   r{   r|   r}   	rO   r;   r   r   r	   r   rN   ru   Z device_memory_stat_current_valuer   r/   r/   r0   memory_allocated|  r   r   c                 C  ry   )ap  
    Return the current size of memory that is held by the allocator of the given device.

    Args:
        device(paddle.CUDAPlace|int|str|None, optional): The device, the id of the device or
            the string name of device like 'gpu:x'. If device is None, the device is the current device.
            Default: None.

    Return:
        int: The current size of memory that is held by the allocator of the given device, in bytes.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:GPU)
            >>> import paddle
            >>> paddle.device.set_device('gpu')  # or '<custom_device>'

            >>> memory_reserved_size = paddle.device.memory_reserved(paddle.CUDAPlace(0))
            >>> memory_reserved_size = paddle.device.memory_reserved(0)
            >>> memory_reserved_size = paddle.device.memory_reserved("gpu:0")
    zpaddle.device.memory_reservedr   rz   r{   r|   r   r   r   r/   r/   r0   memory_reserved  r   r   c                   @  sr   e Zd ZU dZded< ded< ded< 							dd ddZd!d"ddZd#ddZd$ddZd%ddZ	d&ddZ
dS )'r$   a  

    A device event wrapper around StreamBase.

    Args:
        device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)|None): Which device the stream run on. If device is None, the device is the current device. Default: None.
            It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevice,
            where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).
        enable_timing (bool, optional): indicates if the event should measure time, default is False
        blocking (bool, optional): if True, ``wait`` will be blocking, default is False
        interprocess (bool): if True, the event can be shared between processes, default is False

    Returns:
        Event: The event.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
            >>> import paddle

            >>> paddle.set_device('custom_cpu')
            >>> e1 = paddle.device.Event()
            >>> e2 = paddle.device.Event('custom_cpu')
            >>> e3 = paddle.device.Event('custom_cpu:0')
            >>> e4 = paddle.device.Event(paddle.CustomPlace('custom_cpu', 0))

    PlaceLike | Noner;   r-   enable_timing_InitEventBase
event_baseNFblockinginterprocessr,   rv   c                 C  s   |d u rt j | _nt|trt j|| _n|| _t  r0t| jt jr0t	
|||| _d S t  rBt| jt jrBt	 | _d S t| jt jr[t	| j | j |||| _d S tddt j )Ndevice should be gpu, xpu, {}r?   )rO   r   r]   r;   rF   r+   rW   r	   r   r   	CUDAEventr   r   r   XPUEventr   CustomDeviceEventr_   r^   	TypeErrorrS   rT   r   )selfr;   r   r   r   r/   r/   r0   __init__  s8   




zEvent.__init__streamStream | Nonec                 C  s$   |du r	t | j}| j|j dS )a\  

        Records the event in a given stream.

        Args:
            stream(Stream, optional): The given stream. By default, stream is None,
            event will be recorded in current_stream.

        Returns:
            None.

        Examples:
            .. code-block:: python

                >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
                >>> import paddle

                >>> paddle.set_device('custom_cpu')
                >>> e = paddle.device.Event()
                >>> e.record()

                >>> s = paddle.device.Stream()
                >>> e.record(s)

        N)r%   r;   r   recordstream_baser   r   r/   r/   r0   r     s   
zEvent.recordc                 C  
   | j  S )a  

        Checks if all work currently captured by event has completed.

        Returns:
            bool: Whether all work currently captured by event has completed.

        Examples:
            .. code-block:: python

                >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
                >>> import paddle

                >>> paddle.set_device('custom_cpu')
                >>> e = paddle.device.Event()
                >>> e.record()
                >>> e.query()

        )r   queryr   r/   r/   r0   r   7  s   
zEvent.query	end_eventr6   c                 C  s   | j |j S )aF  

        Returns the time elapsed in milliseconds after the event was
        recorded and before the end_event was recorded.

        Returns:
            int: The time.

        Examples:
            .. code-block:: python

                >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
                >>> import paddle

                >>> paddle.set_device('custom_cpu')
                >>> e1 = paddle.device.Event()
                >>> e1.record()

                >>> e2 = paddle.device.Event()
                >>> e2.record()
                >>> e1.elapsed_time(e2)

        )r   elapsed_time)r   r   r/   r/   r0   r   M  s   zEvent.elapsed_timec                 C     | j   dS )a4  

        Waits for the event to complete.
        Waits until the completion of all work currently captured in this event.
        This prevents the CPU thread from proceeding until the event completes.

        Returns:
            None.

        Examples:
            .. code-block:: python

                >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
                >>> import paddle

                >>> paddle.set_device('custom_cpu')
                >>> e = paddle.device.Event()
                >>> e.record()
                >>> e.synchronize()

        N)r   r)   r   r/   r/   r0   r)   g  s   zEvent.synchronize'core.CUDAEvent | core.CustomDeviceEventc                 C  s   | j S N)r   r   r/   r/   r0   __repr__  s   zEvent.__repr__)NFFF)
r;   r   r   r-   r   r-   r   r-   r,   rv   r   r   r   r,   rv   r,   r-   )r   r$   r,   r6   r,   rv   )r,   r   )__name__
__module____qualname____doc____annotations__r   r   r   r   r)   r   r/   r/   r/   r0   r$     s   
 (


r$   c                      s   e Zd ZU dZded< ded< 			d*d+ddZd,ddZd-ddZd.d/ddZd0ddZ	d1ddZ
edd  Zd2 fd#d$Zd3d%d&Zd4d(d)Z  ZS )5r#   aa  

    A device stream wrapper around StreamBase.

    Args:
        device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)|None): Which device the stream run on. If device is None, the device is the current device. Default: None.
            It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevice,
            where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).
        priority(int, optional): priority of the CUDA stream. Can be either
            1 (high priority) or 2 (low priority). By default, streams have
            priority 2.

    Returns:
        Stream: The stream.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
            >>> import paddle

            >>> paddle.set_device('custom_cpu')
            >>> s1 = paddle.device.Stream()
            >>> s2 = paddle.device.Stream('custom_cpu')
            >>> s3 = paddle.device.Stream('custom_cpu:0')
            >>> s4 = paddle.device.Stream(paddle.CustomPlace('custom_cpu', 0))

    _InitStreamBaser   r   r;   N   r   priorityr6   _InitStreamBase | Noner,   rv   c                 C  s  |d urt |tjtjtjfr|| _|j| _d S td|d u r't	j
 | _nt |tr4t	j|| _n|| _t	 rNt | jt	jrNt| j || _d S t	 rdt | jt	jrdt| j | _d S t | jt	jr}tj| j | j |dd| _d S tddt	j )Nz?stream_base should be CUDAStream, XPUStream, CustomDeviceStreamF)r   r   r?   )rF   r   
CUDAStreamCustomDeviceStream	XPUStreamr   rV   r;   r   rO   r   r]   r+   rW   r	   r   r^   r   r   r   r_   rS   rT   r   )r   r;   r   r   r/   r/   r0   r     sL   




zStream.__init__eventr$   c                 C  s   | j |j dS )aV  

        Makes all future work submitted to the stream wait for an event.

        Args:
            event (Event): an event to wait for.

        Returns:
            None.

        Examples:
            .. code-block:: python

                >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
                >>> import paddle

                >>> paddle.set_device('custom_cpu')
                >>> s1 = paddle.device.Stream()
                >>> s2 = paddle.device.Stream()
                >>> e = paddle.device.Event()
                >>> e.record(s1)
                >>> s2.wait_event(e)

        N)r   
wait_eventr   r   r   r/   r/   r0   r        zStream.wait_eventr   c                 C  s   | j |j  dS )a~  

        Synchronizes with another stream.
        All future work submitted to this stream will wait until all kernels
        submitted to a given stream at the time of call complete.

        Args:
            stream (Stream): a stream to synchronize.

        Returns:
            None.

        Examples:
            .. code-block:: python

                >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
                >>> import paddle

                >>> paddle.set_device('custom_cpu')
                >>> s1 = paddle.device.Stream()
                >>> s2 = paddle.device.Stream()
                >>> s1.wait_stream(s2)

        N)r   wait_streamr   r/   r/   r0   r     r   zStream.wait_streamEvent | Nonec                 C  s    |du r	t | j}||  |S )aP  

        Records an event.

        Args:
            event (Event, optional): event to record. If not given, a new one
            will be allocated.

        Returns:
            Event: Recorded event.

        Examples:
            .. code-block:: python

                >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
                >>> import paddle

                >>> paddle.set_device('custom_cpu')
                >>> s = paddle.device.Stream()
                >>> e1 = s.record_event()

                >>> e2 = paddle.device.Event()
                >>> s.record_event(e2)

        N)r$   r;   r   r   r/   r/   r0   record_event  s   

zStream.record_eventr-   c                 C  r   )a  

        Checks if all the work submitted has been completed.

        Returns:
            bool: Whether all kernels in this stream are completed.

        Examples:
            .. code-block:: python

                >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
                >>> import paddle

                >>> paddle.set_device('custom_cpu')
                >>> s = paddle.device.Stream()
                >>> s.query()

        )r   r   r   r/   r/   r0   r   +  s   
zStream.queryc                 C  r   )a  

        Wait for all the kernels in this stream to complete.

        Returns:
            None.

        Examples:
            .. code-block:: python

                >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
                >>> import paddle

                >>> paddle.set_device('custom_cpu')
                >>> s = paddle.device.Stream()
                >>> s.synchronize()

        N)r   r)   r   r/   r/   r0   r)   @  s   zStream.synchronizec                 C  sF   t | jtjrt| jjS t | jtjrt| jjS t| jj	S r   )
rF   r   r   r   ctypesZc_void_pZcuda_streamr   Z
xpu_streamZ
raw_streamr   r/   r/   r0   _as_parameter_U  s
   zStream._as_parameter_or   c                   s   t |trt |S dS )NF)rF   r#   super__eq__)r   r   	__class__r/   r0   r   ^  s   
zStream.__eq__c                 C  s   t | j| jfS r   )hashr   r;   r   r/   r/   r0   __hash__c  s   zStream.__hash__r+   c                 C  s   d| j  d| jjddS )Nz<paddle.device.Stream device=z stream=z#x>)r;   r   valuer   r/   r/   r0   r   f  s   zStream.__repr__)Nr   N)r;   r   r   r6   r   r   r,   rv   )r   r$   r,   rv   )r   r#   r,   rv   r   )r   r   r,   r$   r   r   )r   r   r,   r-   )r,   r6   r,   r+   )r   r   r   r   r   r   r   r   r   r   r)   propertyr   r   r   r   __classcell__r/   r/   r   r0   r#     s$   
 
2




r#   r   c                 C  s   | du r
t j }nt| trt j| }n| }t  r,t|t jr,t	t
| dS t  r@t|t jr@t	t
| dS t|t jrSt	t
| | dS tddt j )a  

    Return the current stream by the device.

    Args:
        device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): The device which want to get stream from.  If device is None, the device is the current device. Default: None.
            It can be ``gpu``, ``gpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevice,
            where ``x`` is the index of the GPUs, CustomDevices. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n).

    Returns:
        Stream: The stream to the device.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
            >>> import paddle

            >>> paddle.set_device('custom_cpu')
            >>> s1 = paddle.device.current_stream()
            >>> s2 = paddle.device.current_stream("custom_cpu:0")
            >>> place = paddle.CustomPlace('custom_cpu', 0)
            >>> s3 = paddle.device.current_stream(place)

    N)r   r   r?   )rO   r   r]   rF   r+   r;   rW   r	   r   r#   r   Z_get_current_streamr^   r   r   Z_xpu_get_current_streamr   Z!_get_current_custom_device_streamr_   r   rS   rT   r   rY   r/   r/   r0   r%   j  s0   
r%   r   c                 C  s   t | jj}t rt| jjtjrt| j |S t	 r/t| jjtj
r/t| jj |S t| jjtjrIt| jj | jj | j |S tddtj )a  

    Set the current stream.

    Args:
        stream(Stream): The selected stream.

    Returns:
        Stream: The previous stream.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
            >>> import paddle

            >>> paddle.set_device('custom_cpu')
            >>> s = paddle.device.Stream()
            >>> paddle.device.set_stream(s)

    r   r?   )r%   r   rV   rO   r	   rF   r   r   Z_set_current_streamr   r   Z_xpu_set_current_streamidxr   Z!_set_current_custom_device_streamr_   r^   r   rS   rT   r;   r   )r   Zprev_streamr/   r/   r0   r&     s.   





r&   c                   @  s:   e Zd ZU dZded< ddddZdd	d
ZdddZdS )r'   aE  

    Notes:
        This API only supports dynamic graph mode currently.
    A context manager that specifies the current stream context by the given stream.

    Args:
        stream(Stream, optional): the selected stream. If stream is None, just yield.

    Returns:
        None.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
            >>> import paddle

            >>> paddle.set_device('custom_cpu')
            >>> s = paddle.device.Stream()
            >>> data1 = paddle.ones(shape=[20])
            >>> data2 = paddle.ones(shape=[20])
            >>> data3 = data1 + data2
            >>> with paddle.device.stream_guard(s):
            ...     s.wait_stream(paddle.device.default_stream()) # type: ignore[attr-defined]
            ...     data4 = data1 + data3

    r   r   Nr,   rv   c                 C  s
   || _ d S r   )r   r   r/   r/   r0   r     s   
zstream_guard.__init__c                 C  sn   | j }|d u r	d S t|j| _| jj|jkr1tjj | _tjj	|j t|j| _
t| d S t| d S r   )r   r%   r;   src_prev_streamrO   baser   r]   	tmp_placerX   dst_prev_streamr&   )r   
cur_streamr/   r/   r0   	__enter__  s   zstream_guard.__enter__exc_typetype[BaseException] | Noneexc_valBaseException | Noneexc_tbTracebackType | Nonec                 C  sV   | j }|d u r	d S | jj|jkr$t| j tjj| j	 t| j d S t| j d S r   )
r   r   r;   r&   r   rO   r   r   rX   r   )r   r   r   r   r   r/   r/   r0   __exit__  s   
zstream_guard.__exit__r   r   r   r   r   r   r   r   r   r,   rv   r   r   r   r   r   r   r   r   r/   r/   r/   r0   r'     s   
 
r'   c                   @  s@   e Zd ZU dZded< ded< dd	d
ZdddZdddZdS )r(   aW  

    Notes:
        This API only supports dynamic graph mode currently.

    A context manager that specifies the current device context by the given device.

    Args:
        device(PlaceLike): The specified device.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:GPU)
            >>> import paddle

            >>> # Set the global default device to CPU
            >>> paddle.set_device("cpu")
            >>> # Temporarily switch to GPU:0 using device_guard with string input
            >>> with paddle.device.device_guard("gpu:0"):
            ...     x = paddle.randn([4, 4])       # Create a Tensor on GPU:0
            ...     x = x.tanh() * 2               # Perform computation on GPU:0
            ...     print(x.place)                 # Check the device of the Tensor
            Place(gpu:0)

            >>> # Set the global default device to GPU:0
            >>> paddle.set_device("gpu:0")
            >>> # Temporarily switch to CPU using device_guard with Place object (CPUPlace)
            >>> cpu_place = paddle.CPUPlace()
            >>> with paddle.device.device_guard(cpu_place):
            ...     x = paddle.randn([4, 4])       # Create a Tensor on CPU
            ...     x = x.tanh() * 2               # Perform computation on CPU
            ...     print(x.place)
            Place(cpu)
    r   _target_place_original_placer;   r   r,   rv   c                 C  sH   t |trtj|| _d S t |tjjjr|| _d S t	dt
| )Nz_'device' must be a string or an instance of a subclass of paddle.base.libpaddle.Place, but got )rF   r+   rO   r;   rW   r   r   Z	libpaddler   rN   type)r   r;   r/   r/   r0   r   =  s   

zdevice_guard.__init__c                 C  s.   t j | _| j| jkrt j| j d S d S r   )rO   r   r]   r   r   rX   r   r/   r/   r0   r   H  s   zdevice_guard.__enter__r   r   r   r   r   r   c                 C  s"   | j | jkrtj| j  d S d S r   )r   r   rO   r   rX   )r   r   r   r   r/   r/   r0   r   M  s   zdevice_guard.__exit__N)r;   r   r,   rv   r   r   r   r/   r/   r/   r0   r(     s   
 $

r(   c                 C  s   | du r
t j }nt| trt j| }n| }t  r+t|t jr+t	
|  dS t  r>t|t jr>t	|  dS t|t jrPt	| |  dS tddt j )a  

    Wait for the compute on the given device to finish.

    Args:
        device(str|paddle.CUDAPlace(n)|paddle.XPUPlace(n)|paddle.CustomPlace(n)): The device which want to wait for.  If device is None, the device is the current device. Default: None.
            It can be ``gpu``, ``gpu:x``, ``xpu``, ``xpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevice,
            where ``x`` is the index of the GPUs, XPUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n).

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env:CUSTOM_DEVICE)
            >>> import paddle

            >>> paddle.set_device('custom_cpu')
            >>> paddle.device.synchronize()
            >>> paddle.device.synchronize("custom_cpu:0")
            >>> place = paddle.CustomPlace('custom_cpu', 0)
            >>> paddle.device.synchronize(place)

    Nr   r?   )rO   r   r]   rF   r+   r;   rW   r	   r   r   Z_device_synchronizer^   r   r   Z_xpu_device_synchronizer   Z_synchronize_custom_devicer_   r   rS   rT   r   rY   r/   r/   r0   r)   W  s$   
r)   )r*   r+   r,   r-   r   )r,   r4   )r5   r6   r,   r7   )r,   r8   )r;   r   r,   r   )r;   r+   r,   r   r   r   )r`   ra   r,   r6   )r,   rg   )r,   ri   )r;   rk   r,   r   )r;   r   rp   r+   r,   r6   r   )r;   rk   r,   r6   )r;   rk   r,   rv   )r;   r   r,   r#   )r   r#   r,   r#   )r;   r   r,   rv   )N
__future__r   r   rJ   rQ   typingr   r   Ztyping_extensionsr   rO   Zpaddle.baser   r   Zpaddle.base.frameworkr   r	   r
   r   rZ   r   r   typesr   r   r4   r   r7   Zpaddle._typing.device_liker   Zpaddle.base.corer   r   r   r   r   r   r   r   r   r   r   Zpaddle.base.libpaddler   r+   r6   r   r   __all__r9   r   r   r   r   rW   r   r   rf   r   r   r    r!   r"   ru   rx   r   r   r   r   r   r   r$   r#   r%   r&   r'   r(   r)   r/   r/   r/   r0   <module>   s   






!
X
 '
@



N
N+'%%+' 6 h
71CB