o
    )i2                     @   s  d dl mZ d dlmZ d dlmZmZmZmZ d dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z;m<Z< d dl=m>Z> ee?Z@e	dedZAG dd dZBdS )     )Mapping)copy)AnyCallableOptionalUnion)TypeVarN)ParallelConfig
VllmConfig)1stateless_destroy_torch_distributed_process_group)
EngineArgs)
PromptType)init_logger)LoRARequest)MULTIMODAL_REGISTRYMultiModalRegistry)PoolingRequestOutputRequestOutput)PoolingParams)SamplingParams)SupportedTask)TokenizerGroupinit_tokenizer_from_configs)UsageContext)Device)EngineCoreClient)OutputProcessor)ParentRequest)	Processor)Executor)PrometheusStatLoggerStatLoggerBaseStatLoggerFactory)Metricget_metrics_snapshot)IterationStats_R)defaultc                   @   s  e Zd ZdZejdeddfdedee	 de
dedeee  d	ed
e
de
ddfddZeejddfdededeee  de
dd f
ddZeejddfdededeee  de
dd f
ddZdefddZde
fddZde
de
fddZedd Zdeedf fd d!Zd"ee ddfd#d$Z					%d^d&ed'ed(ee e!f d)ee" d*ee# d+ee$ee%f  d,ee&eef  d-eddfd.d/Z'deee( ee) f fd0d1Z*d2d3 Z+d4d5 Z,d6d7 Z-d8d9 Z.d:d; Z/d_d<ee0 fd=d>Z1d`d@efdAdBZ2d_dCeee  fdDdEZ3de
fdFdGZ4dee5 fdHdIZ6de7fdJdKZ8d*e#de
fdLdMZ9dNede
fdOdPZ:de;e fdQdRZ<dNede
fdSdTZ=		U	dadVeee>de?f f dWee" dXedYee$ee%f  dee? f
dZd[Z@d\d] ZAdS )b	LLMEnginez-Legacy LLMEngine for backwards compatibility.NFvllm_configexecutor_class	log_statsusage_contextstat_loggersmm_registryuse_cached_outputsmultiprocess_modereturnc	           
      C   s   t jstd|d urtd|| _|j| _|j| _|| _d | _| jr(t	|| _|j
}	|s8|	jdkr8|	 | _nd | _d| _| jjrFd | _nt|j|j|jd| _t|| j|d| _t| j| jd| _tj|d||| jd| _|sw| jjj| _|   d S )	NzUsing V1 LLMEngine, but envs.VLLM_USE_V1=False. This should not happen. As a workaround, try using LLMEngine.from_vllm_config(...) or explicitly set VLLM_USE_V1=0 or 1 and report this issue on Github.zlPassing StatLoggers to LLMEngine in V1 is not yet supported. Set VLLM_USE_V1=0 and file and issue on Github.   F)model_configscheduler_configlora_config)r)   	tokenizerr.   )r+   )r0   Zasyncio_moder)   r*   r+   )envsZVLLM_USE_V1
ValueErrorNotImplementedErrorr)   r3   Zcache_configr+   stat_loggerr    parallel_configZdata_parallel_sizeZstateless_init_dp_groupdp_groupshould_execute_dummy_batchZskip_tokenizer_initr6   r   r4   r5   r   	processorr   output_processorr   Zmake_clientengine_coreZmodel_executorreset_mm_cache)
selfr)   r*   r+   r,   r-   r.   r/   r0   r;    rC   e/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/v1/engine/llm_engine.py__init__,   sX   
zLLMEngine.__init__disable_log_statsc                 C   s   | |t || ||tjdS )Nr)   r*   r+   r,   r-   r0   )r   	get_classr7   VLLM_ENABLE_V1_MULTIPROCESSING)clsr)   r,   r-   rF   rC   rC   rD   from_vllm_configw   s   zLLMEngine.from_vllm_configengine_argsenable_multiprocessingc                 C   s@   | |}t|}tjrtd d}| |||j |||dS )z0Creates an LLM engine from the engine arguments.z'Enabling multiprocessing for LLMEngine.TrG   )Zcreate_engine_configr   rH   r7   rI   loggerdebugrF   )rJ   rL   r,   r-   rM   r)   r*   rC   rC   rD   from_engine_args   s   


zLLMEngine.from_engine_argsc                 C   
   | j  S N)r?   get_num_unfinished_requestsrB   rC   rC   rD   rS         
z%LLMEngine.get_num_unfinished_requestsc                 C   s,   | j  }| jd u r|p| j S | |S rR   )r?   has_unfinished_requestsr<   r@   Zdp_engines_runninghas_unfinished_requests_dp)rB   has_unfinishedrC   rC   rD   rV      s   


z!LLMEngine.has_unfinished_requestsrX   c                 C   s    t | j|}|s|rd| _|S NT)r	   Zhas_unfinished_dpr<   r=   )rB   rX   Zaggregated_has_unfinishedrC   rC   rD   rW      s   z$LLMEngine.has_unfinished_requests_dpc                 C   s   |S rR   rC   )rJ   outputsoutput_typerC   rC   rD   validate_outputs   s   zLLMEngine.validate_outputs.c                 C   rQ   rR   )r@   get_supported_tasksrT   rC   rC   rD   r]      rU   zLLMEngine.get_supported_tasksrequest_idsc                 C   s   | j |}| j| dS )z3Remove request_ids from EngineCore and Detokenizer.N)r?   abort_requestsr@   )rB   r^   rC   rC   rD   abort_request   s   zLLMEngine.abort_requestr   
request_idpromptparamsarrival_timelora_requesttokenization_kwargstrace_headerspriorityc	              
   C   s   t |tstdt| | j||||||||\}	}
t |tr%|jnd}|dkr<| j	|
|	d d | j
	|
 d S t||}t|D ]*}||\}}||d krV|
nt|
}||_||_| j	||	|| | j
	| qEd S )Nz!request_id must be a string, got r2   r   )
isinstancestr	TypeErrortyper>   Zprocess_inputsr   nr?   add_requestr@   r   rangeZget_child_infor   ra   Zsampling_params)rB   ra   rb   rc   rd   re   rf   rg   rh   Z
prompt_strrequestrm   Z
parent_reqidxZchild_requestrC   rC   rD   rn      s0   



zLLMEngine.add_requestc                 C   s   | j rd| _ | j  g S | j }| jrt nd }| jj|j|j	|d}| j
|j | jd urA|jd us8J | jj|j|d |jS )NF)Zengine_core_timestampiteration_stats)scheduler_statsrr   )r=   r@   Zexecute_dummy_batchZ
get_outputr+   r%   r?   Zprocess_outputsrZ   	timestampr_   Zreqs_to_abortr:   rs   recordZrequest_outputs)rB   rZ   rr   Zprocessed_outputsrC   rC   rD   step   s$   



zLLMEngine.stepc                 C      | j S rR   )r)   rT   rC   rC   rD   get_vllm_config     zLLMEngine.get_vllm_configc                 C   rw   rR   )r3   rT   rC   rC   rD   get_model_config  ry   zLLMEngine.get_model_configc                 C      | j d d S rY   r@   ZprofilerT   rC   rC   rD   start_profile     zLLMEngine.start_profilec                 C   r{   )NFr|   rT   rC   rC   rD   stop_profile  r~   zLLMEngine.stop_profilec                 C   s*   | j j| j | j j  | j  d S rR   )r>   r.   Zreset_processor_cacher3   Zmm_input_cache_clientresetr@   rA   rT   rC   rC   rD   rA     s   zLLMEngine.reset_mm_cachedevicec                 C   s   | j   d S rR   )r@   reset_prefix_cache)rB   r   rC   rC   rD   r     s   zLLMEngine.reset_prefix_cacher2   levelc                 C      | j | d S rR   )r@   sleep)rB   r   rC   rC   rD   r     r~   zLLMEngine.sleeptagsc                 C   r   rR   )r@   wake_up)rB   r   rC   rC   rD   r     r~   zLLMEngine.wake_upc                 C   rQ   rR   )r@   is_sleepingrT   rC   rC   rD   r     rU   zLLMEngine.is_sleepingc                 C   s   | j sJ dt S )NzStat logging disabled)r+   r$   rT   rC   rC   rD   get_metrics"  s   zLLMEngine.get_metricsc                 C   s   | j d u r	td| j S )Nz;Unable to get tokenizer because skip_tokenizer_init is True)r6   r8   rT   rC   rC   rD   get_tokenizer_group&  s   
zLLMEngine.get_tokenizer_groupc                 C      | j |S )z<Load a new LoRA adapter into the engine for future requests.)r@   add_lora)rB   re   rC   rC   rD   r   -     zLLMEngine.add_loralora_idc                 C   r   )z&Remove an already loaded LoRA adapter.)r@   remove_lorarB   r   rC   rC   rD   r   1  r   zLLMEngine.remove_lorac                 C   rQ   )zList all registered adapters.)r@   
list_lorasrT   rC   rC   rD   r   5  s   
zLLMEngine.list_lorasc                 C   r   )z&Prevent an adapter from being evicted.)r@   pin_lorar   rC   rC   rD   r   9  r   zLLMEngine.pin_lorarC   methodtimeoutargskwargsc                 C   s   | j ||||S rR   )r@   collective_rpc)rB   r   r   r   r   rC   rC   rD   r   =  s   zLLMEngine.collective_rpcc                 C   s    t | dd  }rt| d S d S )Nr<   )getattrr   )rB   r<   rC   rC   rD   __del__D  s   zLLMEngine.__del__)NNNNr   rR   )r2   )NrC   N)B__name__
__module____qualname____doc__r   ZENGINE_CONTEXTr   r
   rl   r   boolr   listr"   r   rE   classmethodrK   r   rP   intrS   rV   rW   r\   tupler   r]   rj   r`   r   r   r   r   floatr   dictr   r   rn   r   r   rv   rx   rz   r}   r   rA   r   r   r   r   r   r#   r   r   r   r   r   setr   r   r   r&   r   r   rC   rC   rC   rD   r(   )   s    
	

K



	

,
r(   )Ccollections.abcr   r   typingr   r   r   r   Ztyping_extensionsr   Z	vllm.envsr7   Zvllm.configr	   r
   Zvllm.distributedr   Zvllm.engine.arg_utilsr   Zvllm.inputsr   Zvllm.loggerr   Zvllm.lora.requestr   Zvllm.multimodalr   r   Zvllm.outputsr   r   Zvllm.pooling_paramsr   Zvllm.sampling_paramsr   Z
vllm.tasksr   Z'vllm.transformers_utils.tokenizer_groupr   r   Zvllm.usage.usage_libr   Z
vllm.utilsr   Zvllm.v1.engine.core_clientr   Zvllm.v1.engine.output_processorr   Z vllm.v1.engine.parallel_samplingr   Zvllm.v1.engine.processorr   Zvllm.v1.executor.abstractr   Zvllm.v1.metrics.loggersr    r!   r"   Zvllm.v1.metrics.readerr#   r$   Zvllm.v1.metrics.statsr%   r   rN   r&   r(   rC   rC   rC   rD   <module>   s<   