o
    )iI                     @   st  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlZd dlZd dlmZmZ d dlmZmZ d dlmZ e	rod dlmZmZ d d	lmZ d d
lmZ d dl m!Z! d dl"m#Z# ndZdZdZdZdZ!dZ#ee$Z%de&fddZ'G dd de j(Z)G dd de j(Z*G dd de j(Z+G dd deZ,G dd dZ-G dd de-Z.dS )    N)	timedelta)uname)TYPE_CHECKINGAny
NamedTupleOptionalUnion)PrefixStoreProcessGroup)ProcessorInputs
PromptType)init_logger)ModelConfig
VllmConfig)LoRARequest)PoolingParams)SamplingParams)FlexibleArgumentParserreturnc                   C   s   dd t  v S )NZ	microsoft )joinr   lower r   r   d/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/platforms/interface.pyin_wsl$   s   r   c                   @   s   e Zd Ze Ze Ze Ze Ze Z	e Z
e Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze ZdS )_BackendN)__name__
__module____qualname__enumautoZ
FLASH_ATTNZFLASH_ATTN_VLLM_V1ZTRITON_ATTN_VLLM_V1ZXFORMERSZ
ROCM_FLASHZROCM_AITER_MLAZROCM_AITER_MLA_VLLM_V1ZROCM_AITER_FA
TORCH_SDPAZ
FLASHINFERZFLASHINFER_VLLM_V1Z
TRITON_MLAZTRITON_MLA_VLLM_V1ZFLASHMLA_VLLM_V1ZFLASHMLAZCUTLASS_MLAZPALLASZPALLAS_VLLM_V1ZIPEXZDUAL_CHUNK_FLASH_ATTNZDIFFERENTIAL_FLASH_ATTNZNO_ATTENTIONZFLEX_ATTENTIONZ	TREE_ATTNZXFORMERS_VLLM_V1r   r   r   r   r   )   s4    r   c                   @   sL   e Zd Ze Ze Ze Ze Ze Z	e Z
e Ze ZdS )PlatformEnumN)r   r   r   r   r    CUDAROCMTPUXPUCPUNEURONOOTUNSPECIFIEDr   r   r   r   r"   E   s    r"   c                   @   s4   e Zd Ze Ze Ze Ze Ze Z	dS )CpuArchEnumN)
r   r   r   r   r    X86ARMPOWERPCOTHERUNKNOWNr   r   r   r   r+   P   s    r+   c                   @   s:   e Zd ZU eed< eed< defddZdefddZdS )	DeviceCapabilitymajorminorr   c                 C   s   | j  d| j S )N.)r2   r3   selfr   r   r   as_version_str\   s   zDeviceCapability.as_version_strc                 C   s.   d| j   krdk sJ  J | jd | j  S )z
        Express device capability as an integer `<major><minor>`.

        It is assumed that the minor version is always a single digit.
        r   
   )r3   r2   r5   r   r   r   to_int_   s   zDeviceCapability.to_intN)r   r   r   int__annotations__strr7   r9   r   r   r   r   r1   X   s
   
 r1   c                   @   s  e Zd ZU eed< eed< eed< dZeed< dZeed< dZeed	< d
Z	eed< dZ
eed< g Zee ed< g Zee ed< dZee ed< edeej fddZdefddZdefddZdefddZdefddZdefddZdefddZdefd d!Zd"edefd#d$Zdefd%d&Zdefd'd(Z e!d)efd*d+Z"e!dd-ede#fd.d/Z$e!d0e#d1ed2ejd3ee d4ed5ed6ed7edefd8d9Z%e!	:dd)edee& fd;d<Z'e!	:dd=e(e)eef ef d)edefd>d?Z*e!	:dd=e(e)eef ef d)edefd@dAZ+e!dd)edefdBdCZ,e!dd)edefdDdEZ-e!dd)edefdFdGZ.e!dHee defdIdJZ/e!dKdL Z0e!ddMee ddfdNdOZ1e!dPej2ddfdQdRZ3e!	ddSee4 ddfdTdUZ5e!dVe6ddfdWdXZ7e!dYeddfdZd[Z8e!d\eddfd]d^Z9e!de:fd_d`Z;e!defdadbZ<e!	ddPeej=j> de?fdcddZ@e!defdedfZAe!d2ejde)e?e?f fdgdhZBe!defdidjZCe!defdkdlZDe!defdmdnZEe!defdodpZFe!defdqdrZGe!defdsdtZHe!dejfdudvZIe!defdwdxZJe!dyeKdefdzd{ZLe!dyeKdefd|d}ZMe!defd~dZNe!deOde(ePeQf deRddfddZSdefddZTdefddZUe!dd)edefddZVe!defddZWe!dedeXdededeYdeZfddZ[e!d3edefddZ\dS )Platform_enumdevice_namedevice_typer'   dispatch_key ray_device_keyZ'VLLM_DEVICE_CONTROL_ENV_VAR_PLACEHOLDERdevice_control_env_varZinductorsimple_compile_backenddist_backendsupported_quantizationadditional_env_varsN_global_graph_poolr   c                 C   s   t jt jt jgS )z6Returns the supported dtypes for the current platform.)torchZbfloat16Zfloat16Zfloat32r5   r   r   r   supported_dtypes   s   zPlatform.supported_dtypesc                 C      | j tjkS Nr>   r"   r#   r5   r   r   r   is_cuda      zPlatform.is_cudac                 C   rL   rM   )r>   r"   r$   r5   r   r   r   is_rocm   rP   zPlatform.is_rocmc                 C   rL   rM   )r>   r"   r%   r5   r   r   r   is_tpu   rP   zPlatform.is_tpuc                 C   rL   rM   )r>   r"   r&   r5   r   r   r   is_xpu   rP   zPlatform.is_xpuc                 C   rL   rM   )r>   r"   r'   r5   r   r   r   is_cpu   rP   zPlatform.is_cpuc                 C   rL   rM   )r>   r"   r(   r5   r   r   r   	is_neuron   rP   zPlatform.is_neuronc                 C   rL   rM   )r>   r"   r)   r5   r   r   r   is_out_of_tree   rP   zPlatform.is_out_of_tree
prompt_lenc                 C      t jS rM   )sysmaxsize)r6   rW   r   r   r   get_max_output_tokens   s   zPlatform.get_max_output_tokensc                 C   s   | j tjtjfv S )z1Stateless version of [torch.cuda.is_available][].)r>   r"   r#   r$   r5   r   r   r   is_cuda_alike   s   zPlatform.is_cuda_alikec                 C   rL   rM   rN   r5   r   r   r   is_sleep_mode_available   rP   z Platform.is_sleep_mode_available	device_idc                 C   sB   | j tjv rtj| j  dkrtj| j  d}|| }t|S |S )NrB   ,)rD   osenvironsplitr:   )clsr^   Z
device_idsZphysical_device_idr   r   r   device_id_to_physical_device_id   s   z(Platform.device_id_to_physical_device_idF
support_fac                 C   rX   rM   )r   r!   )rc   re   r   r   r   get_vit_attn_backend   s   zPlatform.get_vit_attn_backendselected_backend	head_sizedtypekv_cache_dtype
block_sizeuse_v1use_mlahas_sinkc	           	      C      dS )z,Get the attention backend class of a device.rB   r   )	rc   rg   rh   ri   rj   rk   rl   rm   rn   r   r   r   get_attn_backend_cls      zPlatform.get_attn_backend_clsr   c                 C   ro   )z:Stateless version of [torch.cuda.get_device_capability][].Nr   rc   r^   r   r   r   get_device_capability   rq   zPlatform.get_device_capability
capabilityc                 C   s6   | j |d}|du rdS t|tr||kS | |kS )a.  
        Test whether this platform is compatible with a device capability.

        The `capability` argument can either be:

        - A tuple `(major, minor)`.
        - An integer `<major><minor>`. (See
        [`DeviceCapability.to_int`][vllm.platforms.interface.DeviceCapability.to_int])
        r^   NFrs   
isinstancetupler9   rc   rt   r^   Zcurrent_capabilityr   r   r   has_device_capability      
zPlatform.has_device_capabilityc                 C   s6   | j |d}|du rdS t|tr||kS | |kS )a3  
        Test whether this platform has exactly the specified device capability.

        The `capability` argument can either be:

        - A tuple `(major, minor)`.
        - An integer `<major><minor>`. (See
        [`DeviceCapability.to_int`][vllm.platforms.interface.DeviceCapability.to_int])
        ru   NFrv   ry   r   r   r   is_device_capability   r{   zPlatform.is_device_capabilityc                 C      t )zGet the name of a device.NotImplementedErrorrr   r   r   r   get_device_name     zPlatform.get_device_namec                 C   r}   )z.Get the uuid of a device, e.g. the PCI bus ID.r~   rr   r   r   r   get_device_uuid  r   zPlatform.get_device_uuidc                 C   r}   )z*Get the total memory of a device in bytes.r~   rr   r   r   r   get_device_total_memory  r   z Platform.get_device_total_memoryenforce_eagerc                 C   r}   )zF
        Check if the current platform supports async output.
        r~   )rc   r   r   r   r   is_async_output_supported     z"Platform.is_async_output_supportedc                 C   s   t jddS )a  A device-specific wrapper of `torch.inference_mode`.

        This wrapper is recommended because some hardware backends such as TPU
        do not support `torch.inference_mode`. In such a case, they will fall
        back to `torch.no_grad` by overriding this method.
        T)mode)rJ   inference_moderc   r   r   r   r     s   zPlatform.inference_modeseedc                 C   s0   |durt | tj | t| dS dS )z
        Set the seed of each random module.
        `torch.manual_seed` will set seed on all devices.

        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
        N)randomr   nprJ   Zmanual_seed)rc   r   r   r   r   seed_everything&  s
   
zPlatform.seed_everythingdevicec                 C   r}   )z:
        Set the device for the current platform.
        r~   rc   r   r   r   r   
set_device3  r   zPlatform.set_deviceparserc                 C   ro   )a  
        Do some pre-registration or update action for the current platform.

        This function is called before global VllmConfig is initialized or cli
        arguments are parsed. It's used for out-of-tree platforms to register or
        update the configuration.

        For example, the out-of-tree quantization config can be imported and
        registered here dynamically.
        Nr   )rc   r   r   r   r   pre_register_and_update:  s   z Platform.pre_register_and_updatevllm_configc                 C   ro   )ac  
        Check and update the configuration for the current platform.

        It can raise an exception if the configuration is not compatible with
        the current platform, or it can update the configuration to make it
        compatible with the current platform.

        The config is passed by reference, so it can be modified in place.
        Nr   )rc   r   r   r   r   check_and_update_configJ  s   z Platform.check_and_update_config
model_archc                 C   ro   )a  
        Verify whether the current platform supports the specified model
        architecture.

        - This will raise an Error or Warning based on the model support on
        the current platform.
        - By default all models are considered supported.
        Nr   )rc   r   r   r   r   verify_model_archW  s   
zPlatform.verify_model_archquantc                 C   s.   | j r|| j vrt| d| j ddS dS )zW
        Verify whether the quantization is supported by the current platform.
        z, quantization is currently not supported in r4   N)rG   
ValueErrorr?   )rc   r   r   r   r   verify_quantizationc  s   
zPlatform.verify_quantizationc                 C   sT   t   }|dv rtjS |ds|drtjS |dr"tjS |r'tjS tj	S )z
        Determine the CPU architecture of the current system.
        Returns CpuArchEnum indicating the architecture type.
        )x86_64amd64i386i686armZaarchppc)
platformmachiner   r+   r,   
startswithr-   r.   r/   r0   )rc   r   r   r   r   get_cpu_architecturen  s   
zPlatform.get_cpu_architecturec                 C   s   t  r
td dS dS )z?Checks whether pin memory is available on the current platform.zPUsing 'pin_memory=False' as WSL is detected. This may slow down the performance.FT)r   loggerwarningr   r   r   r   is_pin_memory_available  s   
z Platform.is_pin_memory_availablec                 C   r}   )z3
        Return the memory usage in bytes.
        r~   r   r   r   r   get_current_memory_usage  s   z!Platform.get_current_memory_usagec                 C   r}   )zA
        Return the punica wrapper for current platform.
        r~   r   r   r   r   get_punica_wrapper  r   zPlatform.get_punica_wrapperc                 C   s   t dt dfS )zE
        Return the platform specific values for (-inf, inf)
        z-infinf)float)rc   ri   r   r   r   get_infinity_values  s   zPlatform.get_infinity_valuesc                 C   ro   )zF
        Checks if the platform allows inplace memory updates
        Tr   r   r   r   r   can_update_inplace  r   zPlatform.can_update_inplacec                 C   ro   )zK
        Returns how much padding the LoRA logits need for kernels
           r   r   r   r   r   get_lora_vocab_padding_size  r   z$Platform.get_lora_vocab_padding_sizec                 C   ro   )zW
        Get device specific communicator class for distributed communication.
        zUvllm.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBaser   r   r   r   r   get_device_communicator_cls  r   z$Platform.get_device_communicator_clsc                 C   ro   )zI
        Returns whether the current platform supports MX types.
        Fr   r   r   r   r   supports_mx  r   zPlatform.supports_mxc                 C   ro   )zJ
        Returns whether the current platform supports FP8 types.
        Fr   r   r   r   r   supports_fp8  r   zPlatform.supports_fp8c                 C   ro   )a  
        Returns whether the preferred FP8 type is FNUZ on the current platform.

        There are two representations of FP8, OCP FP8 and FNUZ FP8.
        The OCP specification can be found at https://tinyurl.com/b7jvwpft.
        The FNUZ specification can be found at https://tinyurl.com/5n6hwwu5.

        AMD's MI300 and MI325 have native hardware support for FNUZ. All other
        hardware has converged on the OCP FP8 standard.
        Fr   r   r   r   r   is_fp8_fnuz  s   zPlatform.is_fp8_fnuzc                 C   rX   )z
        Returns the preferred FP8 type on the current platform.

        See the documentation for is_fp8_fnuz for details.
        )rJ   Zfloat8_e4m3fnr   r   r   r   	fp8_dtype  s   zPlatform.fp8_dtypec                 C   s0   ddl m} ddlm} | j}|jp|jdkS )zS
        Whether to use allgather in LogitsProcessor to gather the logits.
        r   N)get_current_vllm_configZexternal_launcher)Z	vllm.envsenvsvllm.configr   parallel_configZVLLM_USE_V1Zdistributed_executor_backend)rc   r   r   r   r   r   r   use_all_gather  s   zPlatform.use_all_gathermodel_configc                 C   ro   )zjReturns whether the current platform can support v1 for the supplied
        model configuration.
        Fr   rc   r   r   r   r   supports_v1  r   zPlatform.supports_v1c                 C   s
   |  |S )zN
        Returns whether the current platform supports v1 by default.
        )r   r   r   r   r   
default_v1  s   
zPlatform.default_v1c                 C   ro   )zR
        Returns if custom allreduce is supported on the current platform
        Fr   r   r   r   r   use_custom_allreduce  r   zPlatform.use_custom_allreducepromptparamsprocessed_inputsc                 C   ro   )z6Raises if this request is unsupported on this platformNr   )rc   r   r   r   r   r   r   validate_request  s    zPlatform.validate_requestkeyc                 C   s>   t t| jd }|d urt||rt ||S td| j| d S )Nz1Current platform %s does not have '%s' attribute.)getattrrJ   r@   hasattrr   r   )r6   r   r   r   r   r   __getattr__  s   
zPlatform.__getattr__c                 C   s    | j }|jdu r|  |_|jS )zE
        Return the global graph pool for the this platform.
        N)	__class__rI   Zgraph_pool_handle)r6   rc   r   r   r   get_global_graph_pool  s   

zPlatform.get_global_graph_poolc                 C   r}   )zO
        Returns the total number of compute units (CU) on single GPU.
        r~   rr   r   r   r   get_cu_count  r   zPlatform.get_cu_countc                 C   ro   )zB
        Get static graph wrapper class for static graph.
        z=vllm.compilation.base_static_graph.AbstractStaticGraphWrapperr   r   r   r   r   get_static_graph_wrapper_cls  r   z%Platform.get_static_graph_wrapper_clsbackendprefix_store
group_rank
group_sizetimeoutc                 C   s   t d| )zI
        Init platform-specific torch distributed process group.
        z'Unsupported torch distributed backend: )RuntimeError)rc   r   r   r   r   r   r   r   r   #stateless_init_device_torch_dist_pg&  s   z,Platform.stateless_init_device_torch_dist_pgc                 C   ro   )zU
        Returns if the kv_cache_dtype is supported by the current platform.
        Fr   )rc   rj   r   r   r   is_kv_cache_dtype_supported4  r   z$Platform.is_kv_cache_dtype_supported)F)r   rM   )]r   r   r   r"   r;   r<   rA   rC   rD   rE   rF   rG   listrH   rI   r   r   propertyrJ   ri   rK   boolrO   rQ   rR   rS   rT   rU   rV   r:   r[   r\   r]   classmethodrd   r   rf   rp   r1   rs   r   rx   rz   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r+   r   r   typesZDevicer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   r
   r   r   r   r   r   r   r=   i   sJ  
 

	



		r=   c                   @   s   e Zd ZejZdZdS )UnspecifiedPlatformrB   N)r   r   r   r"   r*   r>   r@   r   r   r   r   r   <  s    r   )/r   r`   r   r   rY   datetimer   r   typingr   r   r   r   r   numpyr   rJ   Ztorch.distributedr	   r
   Zvllm.inputsr   r   Zvllm.loggerr   r   r   r   Zvllm.lora.requestr   Zvllm.pooling_paramsr   Zvllm.sampling_paramsr   Z
vllm.utilsr   r   r   r   r   Enumr   r"   r+   r1   r=   r   r   r   r   r   <module>   sH      V