o
    )i                      @   s   d dl Z d dlmZmZ d dlZd dlmZ d dlmZ d dl	m
Z
 ddlmZmZmZmZ er9d dlmZmZ ndZdZeeZG dd	 d	eZdS )
    N)TYPE_CHECKINGOptional)init_logger)DEFAULT_MAX_NUM_BATCHED_TOKENS   )DeviceCapabilityPlatformPlatformEnum_Backend)ModelConfig
VllmConfigc                   @   s  e Zd ZU ejZdZeed< dZ	eed< dZ
eed< dZeed< dZeed	< d
Zeed< edededejdee dededededefddZedejddfddZe	d>dedee fddZed>dedefdd Zedefd!d"Zed>dedefd#d$Zed%ee defd&d'Zed(d) Zed*eddfd+d,Z ed-d. Z!e	d?deej"j# de$fd/d0Z%edefd1d2Z&edefd3d4Z'edefd5d6Z(edefd7d8Z)ed9e*defd:d;Z+edefd<d=Z,dS )@XPUPlatformxpudevice_namedevice_typeXPUdispatch_keyZGPUray_device_keyZccldist_backendZZE_AFFINITY_MASKdevice_control_env_varselected_backend	head_sizedtypekv_cache_dtype
block_sizeuse_v1use_mlahas_sinkreturnc	           	      C   s>   |d ur|t jkrtd| tj}|stdtd dS )NzCannot use %s backend on XPU.zXPU backend only supports V1.z+Using Flash Attention backend on V1 engine.z;vllm.v1.attention.backends.flash_attn.FlashAttentionBackend)r
   ZIPEXloggerinfoenvsVLLM_USE_V1
ValueError)	clsr   r   r   r   r   r   r   r    r%   ^/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/platforms/xpu.pyget_attn_backend_cls#   s   
z XPUPlatform.get_attn_backend_clsdeviceNc                 C   s   t j| dS )z:
        Set the device for the current platform.
        N)torchr   
set_devicer$   r(   r%   r%   r&   r*   0   s   zXPUPlatform.set_devicer   	device_idc                 C   s   d S Nr%   r$   r,   r%   r%   r&   get_device_capability7   s   z!XPUPlatform.get_device_capabilityc                 C   s   t j|S r-   )r)   r   get_device_namer.   r%   r%   r&   r0   @   s   zXPUPlatform.get_device_namec                 C      dS )Nz4vllm.lora.punica_wrapper.punica_xpu.PunicaWrapperXPUr%   r$   r%   r%   r&   get_punica_wrapperD      zXPUPlatform.get_punica_wrapperc                 C   s   t j|}|jS r-   )r)   r   Zget_device_propertiesZtotal_memory)r$   r,   Zdevice_propsr%   r%   r&   get_device_total_memoryH   s   z#XPUPlatform.get_device_total_memoryenforce_eagerc                 C   r1   NTr%   )r$   r6   r%   r%   r&   is_async_output_supportedM   r4   z%XPUPlatform.is_async_output_supportedc                 C   s   t  S r-   )r)   Zno_gradr2   r%   r%   r&   inference_modeQ   s   zXPUPlatform.inference_modevllm_configc                 C   s|  |j }|j}|r|jd u rd|_tjr&|d ur&|jjs&ddlm} |j|j	_
|d ur8|jtjkr8|  s8tj|_ddlm} |j	}|jd u sN|j |jkrWtd |j|_|j}d|_|jd u ro|jdkrkd|_n2d	|_n.|jd
krtjdkrdtjd< td n|jdkr|jd	kr|jdkrtd|j d|_|r|jrtd d|j_ d|j_!t"|jj#t$|j_%d S d S d S )N@   r   )CompilationLevel)CUDAGraphModez?[XPU] CUDA graph is not supported on XPU, disabling cudagraphs.z#vllm.v1.worker.xpu_worker.XPUWorkerr   ZrayunimpZspawnVLLM_WORKER_MULTIPROC_METHODz7Please use spawn as start method if you want to use mp.Zexternal_launcherzI%s is not supported on XPU, fallback to ray distributed executor backend.z`MLA is enabled on a non-GPU platform; forcing chunked prefill and prefix caching to be disabled.F)&cache_configmodel_configr   r!   r"   r6   vllm.configr<   ZNO_COMPILATIONcompilation_configlevelr   r)   Zbfloat16device_support_bf16Zfloat16r=   Zcudagraph_modeZmax_cudagraph_modeNONEr   r    parallel_configZ
worker_clsZdistributed_executor_backendZ
world_sizer@   osenvironwarningr   Zscheduler_configZenable_chunked_prefillZchunked_prefill_enabledmaxZmax_model_lenr   Zmax_num_batched_tokens)r$   r:   rA   rB   r<   r=   rD   rH   r%   r%   r&   check_and_update_configU   sn   










z#XPUPlatform.check_and_update_configc                 C   r1   r7   r%   r2   r%   r%   r&   is_pin_memory_available   r4   z#XPUPlatform.is_pin_memory_availablec                 C   s   t j| t j|S r-   )r)   r   Zreset_peak_memory_statsZmax_memory_allocatedr+   r%   r%   r&   get_current_memory_usage   s   z$XPUPlatform.get_current_memory_usagec                 C   s2   |    }|  rtd dS td| dS )NzFIntel Arc A770 have bfloat16 accuracy known issue, fallback to float16FzlDevice name %s supports bfloat16. Please file an issue if you encounter any accuracy problems with bfloat16.T)r0   loweris_client_gpu_a770r   rK   r    r$   r   r%   r%   r&   rF      s   
zXPUPlatform.device_support_bf16c                 C      |    }|ddkS )Nzdata center gpur   r0   rP   countrR   r%   r%   r&   is_data_center_gpu      zXPUPlatform.is_data_center_gpuc                 C   rS   )NZa770r   rT   rR   r%   r%   r&   rQ      rW   zXPUPlatform.is_client_gpu_a770c                 C   r1   )NzFvllm.distributed.device_communicators.xpu_communicator.XpuCommunicatorr%   r2   r%   r%   r&   get_device_communicator_cls   r4   z'XPUPlatform.get_device_communicator_clsrB   c                 C   r1   r7   r%   )r$   rB   r%   r%   r&   supports_v1   r4   zXPUPlatform.supports_v1c                 C   s
   t j S r-   )r)   r   device_countr2   r%   r%   r&   rZ      s   
zXPUPlatform.device_count)r   r-   )-__name__
__module____qualname__r	   r   Z_enumr   str__annotations__r   r   r   r   r   classmethodr
   intr)   r   r   boolr'   r(   r*   r   r/   r0   r3   r5   r8   r9   r   rM   rN   typesZDevicefloatrO   rF   rV   rQ   rX   r   rY   rZ   r%   r%   r%   r&   r      s   
 

D

r   )rI   typingr   r   r)   Z	vllm.envsr!   Zvllm.loggerr   Z
vllm.utilsr   Z	interfacer   r   r	   r
   rC   r   r   r[   r   r   r%   r%   r%   r&   <module>   s   