o
    )i1                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlZd dlmZ d dlmZ ddlmZmZmZmZ eeZe
rQd d	lmZ ndZdd
dZeG dd dZG dd deZdS )    N)	dataclass)	find_spec)TYPE_CHECKINGOptional)init_logger)DEFAULT_MAX_NUM_BATCHED_TOKENS   )CpuArchEnumPlatformPlatformEnum_Backend)
VllmConfigc                 C   s4   t tdrtt| S t dkrt S td)Nsched_getaffinityDarwinzUnsupported OS)hasattroslenr   platformsystem	cpu_countNotImplementedError)pid r   ^/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/platforms/cpu.pyget_max_threads   s
   
r   c                   @   sZ   e Zd ZU dZeed< dZeed< dZeed< ede	defddZ
ed	efd
dZdS )LogicalCPUInfoidphysical_core	numa_nodevaluereturnc                 C   s(   zt |}W |S  ty   d}Y |S w )Nr   )int	Exception)clsr    Z	int_valuer   r   r   _int+   s   
zLogicalCPUInfo._intobj_dictc                 C   sZ   |  d}|  d}|  d}|d u s+|d u s+|d u s+tt|t|t|dS | S )Ncpucorenoder   r   r   )getr   r%   )r&   r   r   r   r   r   r   json_decoder3   s   


zLogicalCPUInfo.json_decoderN)__name__
__module____qualname__r   r"   __annotations__r   r   classmethodstrr%   staticmethoddictr,   r   r   r   r   r   %   s   
 r   c                   @   s  e Zd ZU ejZdZeed< dZ	eed< dZ
eed< dZeed< edeej fd	d
Zed5dedefddZedededejdee dededededefddZed5dedefddZedejddfddZedee defd d!Zed"d# Zed$eddfd%d&Zedeee ee  f fd'd(Z!edefd)d*Z"edefd+d,Z#edefd-d.Z$edefd/d0Z%edefd1d2Z&edefd3d4Z'dS )6CpuPlatformr'   device_namedevice_typeCPUdispatch_keyZgloodist_backendr!   c                 C   sP   |   tjkrtjtjgS tjdr |   tj	kr tj
tjgS tjtj
tjgS )Ndarwin)get_cpu_architecturer	   POWERPCtorchbfloat16Zfloat32sysr   
startswithARMZfloat16)selfr   r   r   supported_dtypesI   s   zCpuPlatform.supported_dtypesr   	device_idc                 C      dS )Nr'   r   )r$   rE   r   r   r   get_device_nameW      zCpuPlatform.get_device_nameselected_backend	head_sizedtypekv_cache_dtype
block_sizeuse_v1use_mlahas_sinkc	           	      C   s@   |r|t jkrtd| |rtdtd |stddS )NzCannot use %s backend on CPU.zMLA is not supported on CPU.zUsing Torch SDPA backend.zCPU backend only supports V1.z4vllm.v1.attention.backends.cpu_attn.TorchSDPABackend)r   Z
TORCH_SDPAloggerinfor   
ValueError)	r$   rI   rJ   rK   rL   rM   rN   rO   rP   r   r   r   get_attn_backend_cls[   s   
z CpuPlatform.get_attn_backend_clsc                 C   sH   dd l m} ddlm} |j}|d u rd| }td |S ||9 }|S )Nr   )	GiB_bytes   zaEnvironment variable VLLM_CPU_KVCACHE_SPACE (GiB) for CPU backend is not set, using 4 by default.)Z	vllm.envsenvs
vllm.utilsrU   ZVLLM_CPU_KVCACHE_SPACErQ   Zwarning_once)r$   rE   rW   rU   Zkv_cache_spacer   r   r   get_device_total_memoryi   s   z#CpuPlatform.get_device_total_memorydeviceNc                 C   s   t j| dS )z:
        Set the device for the current platform.
        N)r>   r'   
set_device)r$   rZ   r   r   r   r[   y   s   zCpuPlatform.set_deviceenforce_eagerc                 C   rF   )NFr   )r$   r\   r   r   r   is_async_output_supported   rH   z%CpuPlatform.is_async_output_supportedc                 C   s   t  S )N)r>   Zno_gradr$   r   r   r   inference_mode   s   zCpuPlatform.inference_modevllm_configc                 C   s  |j }|d ur
d|_|j}tdd u}|r!|jd u r!|rdnd|_|s1|jdkr1td|j d|j}|js:|jrC|j	dkrCtd|j	d	krPd
|_	t
d |j	dkrh|d urh|jtjkrht
d tj|_t |_|j}|jdkr|jd ur|jdkrt
d|j d|_|jdkrd|_ddlm} g |j_|j}|jj|jkrtjdddkrd}	nd}	|j |_|	|_!|j"#ddddd |j$rdg|_%|j&d ur|j'|_|j(j)dksJ dtjd< t*t+ tjd< t*t, tjd< dtjd < t-d!d"}
d#|
v rdtjd$< dtjd%< d&tjd'< d&tjd(< d&tjd)< t*|jj.tjd*< |d urF|j/rHt
0d+ d|j_1d|j_t2|jj3t4|j_5d S d S d S ),NTZintel_extension_for_pytorch      z--block-size=z% requires intel_extension_for_pytorchautozXChunked-prefill and prefix-cache on the CPU backend is not compatible with FP8 KV cache.Zfp8_e4m3Zfp8_e5m2zECPU backend doesn't support fp8_e4m3 KV cache type, cast to fp8_e5m2.zQFP8 KV cache on the CPU backend only does not support fp16 for now, cast to bf16.r   mpzH%s is not supported on CPU, fallback to mp distributed executor backend.z#vllm.v1.worker.cpu_worker.CPUWorkerr   )CompilationLevelZVLLM_CPU_CI_ENV0eagerZinductorF)ZdceZsize_assertsZnan_assertsZepilogue_fusionnoner'   ZspawnZVLLM_WORKER_MULTIPROC_METHODZNUMEXPR_MAX_THREADSZOMP_NUM_THREADS1ZTORCHINDUCTOR_COMPILE_THREADSZ
LD_PRELOAD zlibiomp5.soZKMP_BLOCKTIMEZ
KMP_TPAUSEz	dist,distZKMP_FORKJOIN_BARRIER_PATTERNZKMP_PLAIN_BARRIER_PATTERNZKMP_REDUCTION_BARRIER_PATTERNZLOCAL_WORLD_SIZEz`MLA is enabled on a non-GPU platform; forcing chunked prefill and prefix caching to be disabled.)6model_configZdisable_cascade_attncache_configr   rM   RuntimeErrorscheduler_configZchunked_prefill_enabledZenable_prefix_cachingZcache_dtyperQ   warningrK   r>   Zhalfr?   r5   rY   Zcpu_kvcache_space_bytesparallel_configZ
world_sizeZdistributed_executor_backendZ
worker_clsvllm.configre   compilation_configZcudagraph_capture_sizeslevelZ	PIECEWISEr   environr+   ZDYNAMO_ONCEbackendZinductor_compile_configupdateZuse_inductorZ
custom_opsZlora_configZNO_COMPILATIONZdevice_configr7   r2   r   Zget_num_threadsgetenvZtensor_parallel_sizerO   rR   Zenable_chunked_prefillmaxZmax_model_lenr   Zmax_num_batched_tokens)r$   r`   rk   rl   Zipex_availablern   rp   re   rr   ru   Zld_prealod_strr   r   r   check_and_update_config   s   






	










z#CpuPlatform.check_and_update_configc                    s   t  dksJ tjdddd}tj|tjdd }dd |D }t	d	  fd
d|D }t
 }|D ]}||j q4t|}||fS )NLinuxzlscpu -J -e=CPU,CORE,NODET)shelltext)object_hookZcpusc                 S   s$   g | ]}d |j |j|jfvr|qS )r   r*   .0xr   r   r   
<listcomp>  s
    z@CpuPlatform.get_allowed_cpu_memory_node_list.<locals>.<listcomp>r   c                    s   g | ]	}|j  v r|qS r   )r   r~   Zallowed_cpu_id_listr   r   r   "  s    )r   r   
subprocesscheck_outputjsonloadsr   r,   r   r   setaddr   sorted)r$   Zlscpu_outputZlogical_cpu_listZallowed_numa_nodesr   Zallowed_numa_nodes_listr   r   r    get_allowed_cpu_memory_node_list  s,   

z,CpuPlatform.get_allowed_cpu_memory_node_listc                 C   s   t d dS )Nz#Pin memory is not supported on CPU.F)rQ   ro   r^   r   r   r   is_pin_memory_available.  s   
z#CpuPlatform.is_pin_memory_availablec                 C   rF   )Nz4vllm.lora.punica_wrapper.punica_cpu.PunicaWrapperCPUr   r^   r   r   r   get_punica_wrapper3  rH   zCpuPlatform.get_punica_wrapperc                 C   rF   )zW
        Get device specific communicator class for distributed communication.
        zFvllm.distributed.device_communicators.cpu_communicator.CpuCommunicatorr   r^   r   r   r   get_device_communicator_cls7     z'CpuPlatform.get_device_communicator_clsc                 C   rF   )NTr   r^   r   r   r   supports_structured_output>  rH   z&CpuPlatform.supports_structured_outputc                 C   rF   )zjReturns whether the current platform can support v1 for the supplied
        model configuration.
        Tr   )r$   rk   r   r   r   supports_v1B  r   zCpuPlatform.supports_v1c                 C   s&   |   }| |o|tjtjtjfv S )zqReturns whether the current platform can use v1 by default for the
        supplied model configuration.
        )r<   r   r	   ZX86r=   rB   )r$   rk   archr   r   r   
default_v1I  s   zCpuPlatform.default_v1r   )(r-   r.   r/   r   r8   Z_enumr6   r2   r0   r7   r9   r:   propertylistr>   rK   rD   r1   r"   rG   r   r   boolrT   rY   rZ   r[   r]   r_   r   ry   tupler   r   r   r   r   r   r   r   r   r   r   r   r5   B   sj   
 

 r5   r   )r   r   r   r   r@   dataclassesr   importlib.utilr   typingr   r   r>   Zvllm.loggerr   rX   r   Z	interfacer	   r
   r   r   r-   rQ   rq   r   r   r   r5   r   r   r   r   <module>   s(   
	