o
    )i                     @   s   d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
 d dlmZ ddlmZmZ er9d d	lmZ ndZe
eZG d
d de jZG dd deZdS )    N)	lru_cache)TYPE_CHECKINGOptional)envs)init_logger)DEFAULT_MAX_NUM_BATCHED_TOKENS   )PlatformPlatformEnum)
VllmConfigc                   @   s   e Zd ZdZdZdS )NeuronFrameworkztransformers-neuronxzneuronx-distributed-inferenceN)__name__
__module____qualname__TRANSFORMERS_NEURONXNEURONX_DISTRIBUTED_INFERENCE r   r   a/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/platforms/neuron.pyr      s    r   c                   @   s&  e Zd ZU ejZdZeed< dZ	eed< dZ
eed< ddgZee ed< d	Zeed
< dZeed< ed)dedefddZedee defddZededdfddZedefddZedefddZedefddZeedefdd Zeedefd!d"Zd#d$ Zd%d& Zd'd( ZdS )*NeuronPlatformneurondevice_namedevice_typeZneuron_coresray_device_keyZneuron_quantZ
fbgemm_fp8supported_quantizationZgloodist_backendZNEURON_RT_VISIBLE_CORESdevice_control_env_varr   	device_idreturnc                 C      dS )Nr   r   )clsr   r   r   r   get_device_name$      zNeuronPlatform.get_device_nameenforce_eagerc                 C   r   )NFr   )r   r"   r   r   r   is_async_output_supported(   r!   z(NeuronPlatform.is_async_output_supportedvllm_configNc                 C   s   |j }|jdkrd|_|jdkrd|_|jr|jr|jj|j_|jr>|jjr@t	
d d|j_d|j_t|jjt|j_d S d S d S )Nautoz&vllm.worker.neuron_worker.NeuronWorkerr   uniz`MLA is enabled on a non-GPU platform; forcing chunked prefill and prefix caching to be disabled.F)parallel_configZ
worker_clsZ
world_sizeZdistributed_executor_backendZcache_configZmodel_configZmax_model_len
block_sizeZuse_mlaloggerinfoZscheduler_configZenable_chunked_prefillZchunked_prefill_enabledmaxr   Zmax_num_batched_tokens)r   r$   r'   r   r   r   check_and_update_config,   s(   

z&NeuronPlatform.check_and_update_configc                 C   s   t d dS )Nz&Pin memory is not supported on Neuron.F)r)   warningr   r   r   r   is_pin_memory_availableE   s   
z&NeuronPlatform.is_pin_memory_availablec                 C   s   t jrdS t S )NzLvllm.distributed.device_communicators.neuron_communicator.NeuronCommunicator)r   ZVLLM_USE_V1r	   get_device_communicator_clsr.   r   r   r   r0   J   s   z*NeuronPlatform.get_device_communicator_clsc                 C   r   )NTr   r.   r   r   r   use_all_gatherQ   r!   zNeuronPlatform.use_all_gatherc                 C   0   z	dd l }W |d uS  ty   d }Y |d uS w Nr   )neuronx_distributed_inferenceImportError)r   r4   r   r   r    is_neuronx_distributed_inferenceU      
z/NeuronPlatform.is_neuronx_distributed_inferencec                 C   r2   r3   )transformers_neuronxr5   )r   r8   r   r   r   is_transformers_neuronx^   r7   z&NeuronPlatform.is_transformers_neuronxc                 C   s   |   std|  |  }|  }tjd}tjj	}tj
j	}||kr*|r*| jS ||kr0|s6|du r9|r9tj
S |du rB|rBtjS dS )a  Return the specified framework if corresponding installations are
        available.

        If no framework is specified, use neuronx-distributed-inference by
        default.
        If that's unavailable, check and switch to transformers-neuronx.
        z+Neuron Framework unavailable for platform: ZVLLM_NEURON_FRAMEWORKN)Z	is_neuronAssertionErrorr9   r6   osenvirongetr   r   valuer   )selfZtnx_installedZnxd_installedZspecified_frameworkZtnx_frameworknxd_frameworkr   r   r   get_neuron_framework_to_useg   s"   z*NeuronPlatform.get_neuron_framework_to_usec                 C   s   t j}|  |kS )a/  
        Return True if the framework determined in get_neuron_framework_to_use()
        is NeuronFramework.NEURONX_DISTRIBUTED_INFERENCE, False otherwise. This
        is used to select the Neuron model framework and framework-specific
        configuration to apply during model compilation.
        )r   r   rA   )r?   r@   r   r   r   use_neuronx_distributed   s   z&NeuronPlatform.use_neuronx_distributedc                 C   s   |   tjkS )a&  
        Return True if the framework determined in get_neuron_framework_to_use()
        is NeuronFramework.TRANSFORMERS_NEURONX, False otherwise. This is used
        to select the Neuron model framework and framework-specific
        configuration to apply during model compilation.
        )rA   r   r   )r?   r   r   r   use_transformers_neuronx   s   z'NeuronPlatform.use_transformers_neuronx)r   ) r   r   r   r
   ZNEURONZ_enumr   str__annotations__r   r   r   listr   r   classmethodintr    r   boolr#   r   r,   r/   r0   r1   r   r6   r9   rA   rB   rC   r   r   r   r   r      s:   
 
r   )enumr;   	functoolsr   typingr   r   Zvllmr   Zvllm.loggerr   Z
vllm.utilsr   Z	interfacer	   r
   Zvllm.configr   r   r)   Enumr   r   r   r   r   r   <module>   s   