o
    )iV#                     @   sJ  d dl Z d dlZd dlmZ d dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZmZmZ d d
lmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z( ee)Z*G dd de	Z+de,e	 fddZ-de j.fddZ/de j.fddZ0	d	dddZ1dS )    N)Optional)CLISubcommand)
run_serverrun_server_workersetup_server)make_arg_parservalidate_parsed_serve_args)VLLM_SUBCMD_PARSER_EPILOG)show_filtered_argument_or_group_from_help)init_logger)UsageContext)FlexibleArgumentParserdecorate_logsget_tcp_uriset_process_title)EngineCoreProc)CoreEngineProcManagerlaunch_core_engines)Executor)setup_multiprocess_prometheus)APIServerProcessManagerwait_for_completion_or_failurec                   @   sT   e Zd ZdZdZedejddfddZdejddfdd	Z	d
ej
defddZdS )ServeSubcommandz)The `serve` subcommand for the vLLM CLI. serveargsreturnNc                 C   s`   t | dr| jd ur| j| _| js| jdk rt|  d S | jdkr't|  d S tt	|  d S )N	model_tag   )
hasattrr   modelheadlessapi_server_countrun_headlessrun_multi_api_serveruvlooprunr   )r    r&   f/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/entrypoints/cli/serve.pycmd%   s   
zServeSubcommand.cmdc                 C   s   t | d S N)r   )selfr   r&   r&   r'   validate4   s   zServeSubcommand.validate
subparsersc                 C   s0   |j ddddd}t|}t|dg t|_|S )Nr   z,Start the vLLM OpenAI Compatible API server.z vllm serve [model_tag] [options])helpdescriptionusage)
add_parserr   r
   r	   epilog)r*   r,   Zserve_parserr&   r&   r'   subparser_init7   s   zServeSubcommand.subparser_init)__name__
__module____qualname____doc__namestaticmethodargparse	Namespacer(   r+   _SubParsersActionr   r2   r&   r&   r&   r'   r   !   s    r   r   c                   C   s   t  gS r)   )r   r&   r&   r&   r'   cmd_initF   s   r<   r   c                 C   s  | j dkr	tdtj| }tj}|j|dd}tj	s td|j
r'td|j}|j}|dkr5td|j}|j}t||}d	d
 }	ttj|	 ttj|	 td|| ttj||jjd|d|t||j d	}
z|
  W td |
  d S td |
  w )Nr   z.api_server_count can't be set in headless modeT)usage_contextr    z&Headless mode is only supported for V1z:data_parallel_hybrid_lb is not applicable in headless moder   z5data_parallel_size_local must be > 0 in headless modec                 S   s   t d|  t)NzReceived %d signal.)loggerdebug
SystemExit)signumframer&   r&   r'   signal_handlerh   s   z$run_headless.<locals>.signal_handlerzQLaunching %d data parallel engine(s) in headless mode, with head node address %s.F)	Z	target_fnlocal_engine_countstart_indexZlocal_start_indexvllm_configZlocal_clienthandshake_addressexecutor_class	log_statszShutting down.)r!   
ValueErrorvllmAsyncEngineArgsfrom_cli_argsr   OPENAI_API_SERVERcreate_engine_configenvsVLLM_USE_V1data_parallel_hybrid_lbparallel_configZdata_parallel_size_localZdata_parallel_master_ipZdata_parallel_rpc_portr   signalSIGTERMSIGINTr>   infor   r   Zrun_engine_coredata_parallel_rankr   	get_classdisable_log_statsZ
join_firstclose)r   engine_argsr=   rF   rS   rD   hostportrG   rC   engine_managerr&   r&   r'   r"   J   sT   





r"   c                 C   s  | j rJ | j}|dksJ | j}|dkrt  d| _t| \}}tj| }tj	}|j
|d}|j}|dkrQtjs>tdtjrEtd|jrQ|dkrQtd t|}	|j }
|j}|j}|j}|j}|sp|sp|dkspJ d }t||	|
|.\}}}tt||| ||j|j|r| nd d}|dks|s|st d
i |}W d    n1 sw   Y  |d u r|j!|d< t d
i |}t"|||d	 d S )Nr   r   )r=   z-api_server_count > 1 is only supported for V1zIVLLM_ALLOW_RUNTIME_LORA_UPDATING cannot be used with api_server_count > 1zaMulti-modal processor cache is disabled because it is not compatible with `api_server_count > 1`.)Ztarget_server_fnlisten_addresssockr   Znum_serversZinput_addressesZoutput_addressesstats_update_addressrb   )api_server_managerr_   coordinatorr&   )#r    r!   Zmm_processor_cache_gbr   r   rK   rL   rM   r   rN   rO   model_configrP   rQ   rJ   Z VLLM_ALLOW_RUNTIME_LORA_UPDATINGZis_multimodal_modelr>   warningr   rY   rZ   rS   rX   Zdata_parallel_external_lbrR   r   dictrun_api_server_worker_procZinputsZoutputsZget_stats_publish_addressr   Zfrontend_stats_publish_addressr   )r   Znum_api_serversZorig_mm_processor_cache_gbr`   ra   r\   r=   rF   re   rH   rI   rS   Zdp_rankZexternal_dp_lbZhybrid_dp_lbrc   Zlocal_engine_managerrd   	addressesZapi_server_manager_kwargsr&   r&   r'   r#      sx   




r#   c                 K   sH   |r| ddnd}tdt| t  tt| |||fi | dS )z6Entrypoint for individual API server worker processes.Zclient_indexr   Z	APIServerN)getr   strr   r$   r%   r   )r`   ra   r   Zclient_configZuvicorn_kwargsZserver_indexr&   r&   r'   rh      s   rh   r)   )r   N)2r9   rT   typingr   r$   rK   Z	vllm.envsrP   Zvllm.entrypoints.cli.typesr   Z"vllm.entrypoints.openai.api_serverr   r   r   Z vllm.entrypoints.openai.cli_argsr   r   Zvllm.entrypoints.utilsr	   r
   Zvllm.loggerr   Zvllm.usage.usage_libr   Z
vllm.utilsr   r   r   r   Zvllm.v1.engine.corer   Zvllm.v1.engine.utilsr   r   Zvllm.v1.executor.abstractr   Zvllm.v1.metrics.prometheusr   Zvllm.v1.utilsr   r   r3   r>   r   listr<   r:   r"   r#   rh   r&   r&   r&   r'   <module>   s6   %=V