o
    )i	                  	   @   s
  d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlZd dlZd dlmZ d dlmZmZm Z  d dl!m"Z" d d	l#m$Z$ ej%d
krtd dlm&Z& nd dl#m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4 d dl5m6Z6 d dl7m8Z8 d dl9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZCmDZDmEZEmFZFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZOmPZPmQZQ d dlRmSZS d dlTmUZU d dlVmWZX d dlVmYZZ d dl[m\Z\ d dl]m^Z^ d dl_m`Z` d dlambZb d dlcmdZdmeZe d dlfmgZg d dlhmiZimjZj d dlkmlZlmmZm d dlnmoZompZpmqZq d dlrmsZsmtZt d d lumvZvmwZwmxZxmyZy e^ezZ{ee>e@eBeHe<eJeMf Z|ee:eAeLf Z}eeOeQf Z~ee|e}e~eIf Zee?e;eDePeNeGe=eKf ZG d!d" d"e&ZG d#d$ d$e&ZWeee eeeWf Zd%ed&e$e fd'd(Zd%ed&e$eW fd)d*Zed+ed,ZG d-d. d.eZG d/d0 d0eZG d1d2 d2eeeee Zee< ZG d3d4 d4eeC Ze  e  e  e  G d5d6 d6Zd7eemdf d&eemdf fd8d9ZdS ):    N)AsyncGeneratorIterableMappingSequence)ThreadPoolExecutor)
HTTPStatus)
	AnnotatedAnyCallableClassVarGenericOptionalTypeVarUnioncastoverload)Request)	BaseModel
ConfigDictField)Headers)TypeIs)      )	TypedDict)ModelConfig)EngineClient)ChatCompletionMessageParamChatTemplateContentFormatOptionConversationMessageapply_hf_chat_templateapply_mistral_chat_templateparse_chat_messages_futures$resolve_chat_template_content_format)ConversationContext)RequestLogger)ChatCompletionRequestChatCompletionResponseClassificationRequestClassificationResponseCompletionRequestCompletionResponseDetokenizeRequestEmbeddingChatRequestEmbeddingCompletionRequestEmbeddingRequestEmbeddingResponse	ErrorInfoErrorResponsePoolingResponseRerankRequestResponsesRequestScoreRequestScoreResponseTokenizeChatRequestTokenizeCompletionRequestTokenizeResponseTranscriptionRequestTranscriptionResponseTranslationRequest)OpenAIServingModels)
ToolParser)EmbedsPrompt)TokensPrompt)parse_and_batch_prompt)init_logger)LoRARequest)MultiModalDataDict)PoolingRequestOutputRequestOutput)PoolingParams)BeamSearchParamsSamplingParams)LogprobPromptLogprobs)contains_trace_headersextract_trace_headerslog_tracing_disabled_warning)AnyTokenizerMistralTokenizer)AsyncMicrobatchTokenizer
is_list_ofmerge_async_iteratorsrandom_uuidc                   @   s"   e Zd ZU eed< ee ed< dS )TextTokensPromptpromptprompt_token_idsN)__name__
__module____qualname__str__annotations__listint r`   r`   r/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_engine.pyrV   i   s   
 rV   c                   @   s   e Zd ZU ejed< dS )r@   prompt_embedsN)rY   rZ   r[   torchTensorr]   r`   r`   r`   ra   r@   n   s   
 r@   rW   returnc                 C   s   t | tod| v od| vS NrX   rb   
isinstancedictrW   r`   r`   ra   is_text_tokens_promptu      rk   c                 C   s   t | tod| vod| v S rf   rg   rj   r`   r`   ra   is_embeds_promptz   rl   rm   RequestT)boundc                   @   sP   e Zd ZU dZg Zeee  ed< g Z	ee
ee ee f  ed< eddZdS )RequestProcessingMixinzY
    Mixin for request processing,
    handling prompt preparation and engine input.
    request_promptsengine_promptsTarbitrary_types_allowedN)rY   rZ   r[   __doc__rq   r   r   RequestPromptr]   rr   r   r^   EngineTokensPromptEngineEmbedsPromptr   model_configr`   r`   r`   ra   rp      s   
 rp   c                   @   sb   e Zd ZU dZdZeeeee	e
ef f df  ed< eedZee	e
ef  ed< eddZdS )ResponseGenerationMixinz`
    Mixin for response generation,
    managing result generators and final batch results.
    Nresult_generatordefault_factoryfinal_res_batchTrs   )rY   rZ   r[   ru   r{   r   r   tupler_   r   rG   rF   r]   r   r^   r~   r   ry   r`   r`   r`   ra   rz      s   
 rz   c                   @   s   e Zd ZU eed< dZee ed< eed< eed< e	dd dZ
eed	< dZee ed
< dZee ed< dZeeee	ddf  ed< edddZdS )ServeContextrequestNraw_request
model_name
request_idc                   C   s   t t S N)r_   timer`   r`   r`   ra   <lambda>   s    zServeContext.<lambda>r|   created_timelora_request	tokenizer   getruncate_prompt_tokensr`   T)Zprotected_namespacesrt   )rY   rZ   r[   rn   r]   r   r   r   r\   r   r   r_   r   rD   r   rP   r   r   r   ry   r`   r`   r`   ra   r      s   
 
r   c                   @   s&   e Zd ZU dZee ed< eed< dS )EmbeddingServeContextNchat_templatechat_template_content_format)rY   rZ   r[   r   r   r\   r]   r   r`   r`   r`   ra   r      s   
 r   c                !       s  e Zd ZU dZee ed< ddddedede	de
e d	ed
ef fddZdefddZdede
e fddZdedeeef fddZdedeeef fddZdedeeeef df fddZdede
e fddZdedeeef fddZdede
e fddZdede
e fd d!Zd"ejfd#ed$ed%edefd&d'Z d"ejfd#ed$ed%edefd(d)Z!d*e"de
e fd+d,Z#d*e"de
e$ fd-d.Z%	dd*e"d/ede
e$ fd0d1Z&d*e"de'e fd2d3Z(d*e"d4e)d5ed6e
e*e+e,d7d8f  d9ede-fd:d;Z.d*e"d4e)d<e/e+ d6e
e*e+e,d=d8f  de-f
d>d?Z0d*e"d@e/e+ dAede-fdBdCZ1		Ddd*e"d4e)dEeee/e+ f d6e
e*e+e,d7d8f  d9ede-fdFdGZ2		Ddd*e"d4e)dHe3eee/e+ f  d6e
e*e+e,d7d8f  d9edee-df fdIdJZ4		Ddd*e"d4e)dKe
eee/e e/e+ e/e/e+  f  d6e
e*e+e,d7d8f  d9ede5e/e- e/e6 f fdLdMZ7e8	N	Ndd*ee9e:e;e<e=e>f d4e)dKeee/e e/e+ e/e/e+  f d6e
e*e+e,d7d8f  d9ede5e/e- e/e? f fdOdPZ@e8	N	Ndd*eAd4e)dKe
eee/e e/e+ e/e/e+  f  d6e
e*e+e,d7d8f  d9ede5e/ee-e6f  e/ee?eBf  f fdQdPZ@		Ddd*eCd4e)dKe
eee/e e/e+ e/e/e+  f  d6e
e*e+e,d7d8f  d9ede5ee/e- e/ee-e6f  f ee/e? e/ee?eBf  f f fdRdPZ@	D							dd*eeDeEf d4e)dSe/eF dTe
e dUeGdVedWedXe
e/eHeeIf   dYe
e/eHeef   dZe
eHeeIf  d[e
eJe)geKf  d6e
e*e+e,d=d8f  d9ede5e/eL eMeN e/e? f fd\d]ZO		^dd_ed`eNdae?dbePdceQdde
e$ dee+fdfdgZReS	ddhe
eeTe/eT f  d6e
e*e+e,d=d8f  de/e6 fdidjZUd_edkeNdle
eePeeVf  dde
e$ ddf
dmdnZWdoeXde
eYeef  fdpdqZZeS	ddre
e[ dse
e de
e fdtduZ\eS	ddve]dwe+d4e)dxedef
dydzZ^d{e
e defd|d}Z_		dd{e
e dde
e$ defd~dZ`  ZaS )OpenAIServingu   
    A short string prepended to every request’s ID (e.g. "embd", "classify")
    so you can easily tell “this ID came from Embedding vs Classification.”
    request_id_prefixF)return_tokens_as_token_idsenable_force_include_usageengine_clientry   modelsrequest_loggerr   r   c                   sL   t    || _|| _|j| _|| _|| _|| _|| _t	dd| _
i | _d S )Nr   )max_workers)super__init__r   ry   max_model_lenr   r   r   r   r   Z_tokenizer_executor_async_tokenizer_pool)selfr   ry   r   r   r   r   	__class__r`   ra   r      s   

zOpenAIServing.__init__re   c                 C   s*   | j |}|du rt|}|| j |< |S )zh
        Return (and cache) an `AsyncMicrobatchTokenizer` bound to the
        given tokenizer.
        N)r   getrR   )r   r   async_tokenizerr`   r`   ra   _get_async_tokenizer   s
   
z"OpenAIServing._get_async_tokenizerctxc                    s   dS )z
        Default preprocessing hook. Subclasses may override
        to prepare `ctx` (classification, embedding, etc.).
        Nr`   r   r   r`   r`   ra   _preprocess   s   zOpenAIServing._preprocessc                 C   s
   |  dS )z
        Default response builder. Subclass may override this method
        to return the appropriate response object.
        zunimplemented endpoint)create_error_responser   r`   r`   ra   _build_response   s   
zOpenAIServing._build_responsec                    s0   |  |}|2 z	3 d H W }|  S 6 | dS )Nz!No response yielded from pipeline)	_pipeliner   )r   r   Z
generationresponser`   r`   ra   handle  s   

zOpenAIServing.handleNc                 C  s   |  |jI dH  }r|V  | | }r|V  | |I dH }t|tr)|V  | |I dH }t|tr9|V  | |I dH }t|trI|V  | |V  dS )z;Execute the request processing pipeline yielding responses.N)	_check_modelr   _validate_requestr   rh   r2   _prepare_generators_collect_batchr   )r   r   errorZpreprocess_retZgenerators_retZcollect_retr`   r`   ra   r     s   


zOpenAIServing._pipelinec                 C   s8   t |jdd }|d ur|| jkr||_d S | dS d S )Nr   zetruncate_prompt_tokens value is greater than max_model_len. Please, select a smaller truncation size.)getattrr   r   r   r   )r   r   r   r`   r`   ra   r   %  s   
zOpenAIServing._validate_requestc                 C   s    t |jds| dS |j S )Nto_pooling_paramsz0Request type does not support pooling parameters)hasattrr   r   r   r   r`   r`   ra   _create_pooling_params3  s
   
z$OpenAIServing._create_pooling_paramsc           
         s4  g }z}|j du rdn	| |j jI dH }| |}t|tr"|W S |jdu r-| dW S t|jD ]F\}}|j	 d| }|j
du rK| d  W S | j||j
| ||jd ttttf |}| jj||||j|t|jddd}|| q2t| |_W dS  ty }	 z| t|	W  Y d}	~	S d}	~	ww )	z2Schedule the request and get the result generator.NEngine prompts not available-zRequest prompts not availableparamsr   priorityr   )r   trace_headersr   )r   _get_trace_headersheadersr   rh   r2   rr   r   	enumerater   rq   _log_inputsr   r   r   rw   rx   r   encoder   r   appendrT   r{   	Exceptionr\   )
r   r   
generatorsr   Zpooling_paramsiengine_promptZrequest_id_item	generatorer`   r`   ra   r   =  sX   





	
z!OpenAIServing._prepare_generatorsc              
      s   zF|j du r| dW S t|j }dg| }|jdu r"| dW S |j2 z3 dH W \}}|||< q%6 d|v r=| dW S dd |D |_W dS  ty` } z| t|W  Y d}~S d}~ww )z0Collect batch results from the result generator.Nr   zResult generator not availablez*Failed to generate results for all promptsc                 S   s   g | ]}|d ur|qS r   r`   ).0resr`   r`   ra   
<listcomp>  s    z0OpenAIServing._collect_batch.<locals>.<listcomp>)rr   r   lenr{   r~   r   r\   )r   r   Znum_promptsr~   r   r   r   r`   r`   ra   r   w  s6   




zOpenAIServing._collect_batchZBadRequestErrormessageerr_typestatus_codec                 C   s   t t|||jddS )N)r   typecode)r   )r2   r1   value)r   r   r   r   r`   r`   ra   r     s   
z#OpenAIServing.create_error_responsec                 C   s   t | j|||d }|S )Nr   r   r   )jsondumpsr   Z
model_dump)r   r   r   r   Zjson_strr`   r`   ra   create_streaming_error_response  s   z-OpenAIServing.create_streaming_error_responser   c                    s   d }|  |jrd S |j| jjv rd S tjr<|jr<| j|jI d H  }r<t|tr-d S t|t	r<|j
jtjjkr<|}|pK| jd|j ddtjdS )NThe model `` does not exist.ZNotFoundErrorr   )_is_model_supportedmodelr   lora_requestsenvsZ VLLM_ALLOW_RUNTIME_LORA_UPDATINGZresolve_lorarh   rD   r2   r   r   r   BAD_REQUESTr   r   	NOT_FOUND)r   r   Zerror_responseZload_resultr`   r`   ra   r     s&   

zOpenAIServing._check_modelc                 C   sN   |  |}t }| jj D ]}|j|v r|| qt|dkr%| S dS )z;Determine if there are any active default multimodal loras.r   N)	_get_message_typessetr   r   values	lora_nameaddr   pop)r   r   message_typesZdefault_mm_lorasZlorar`   r`   ra   _get_active_default_mm_loras  s   


z*OpenAIServing._get_active_default_mm_lorassupports_default_mm_lorasc                 C   sX   |j | jjv r| jj|j  S |r| |}|d ur|S | |j r#d S td|j  d)Nr   r   )r   r   r   r   r   
ValueError)r   r   r   Zdefault_mm_lorar`   r`   ra   _maybe_get_adapters  s   
z!OpenAIServing._maybe_get_adaptersc                 C   sr   t  }t|ds
|S |jD ])}t|tr6d|v r6t|d tr6|d D ]}d|v r5||d dd  q#q|S )zRetrieve the set of types from message content dicts up
        until `_`; we use this to match potential multimodal data
        with default per modality loras.
        messagescontentr   _r   )r   r   r   rh   ri   r^   r   split)r   r   r   r   Zcontent_dictr`   r`   ra   r     s   

z OpenAIServing._get_message_typesr   rW   r   r   add_special_tokensc           
         s   |  |}| jjd ur| jjddr| }|d u r&|||dI d H }n|dk r7|||d| jdI d H }n|||d|dI d H }|j}|}	| |||	S )NZdo_lower_caseF)r   r   T)r   Z
truncation
max_length)r   ry   Zencoder_configr   lowerr   	input_ids_validate_input)
r   r   r   rW   r   r   r   encodedr   
input_textr`   r`   ra   _normalize_prompt_text_to_input  s8   
z-OpenAIServing._normalize_prompt_text_to_input
prompt_idsr   c                    s`   |  |}|d u r|}n|dk r|| j d  }n|| d  }||I d H }| |||S Nr   )r   r   decoder   )r   r   r   r   r   r   r   r   r`   r`   ra   !_normalize_prompt_tokens_to_input*  s   
z/OpenAIServing._normalize_prompt_tokens_to_inputr   r   c                 C   s"  t |}t|tttttfr6|| jkr0tdtdi}|t	|d}t
d| j d| d| dt||dS t|tttfrDt||dS t|trP|jpN|j}nt|d	d }|| jkrgt
d| j d
| d|d ur|| | jkrt
d| d| j d| d| d| j d| dt||dS )NZscoreZclassificationzembedding generationz'This model's maximum context length is z  tokens. However, you requested z tokens in the input for z(. Please reduce the length of the input.rW   rX   
max_tokensz# tokens. However, your request has z> input tokens. Please reduce the length of the input messages.z6'max_tokens' or 'max_completion_tokens' is too large: z). This model's maximum context length is z tokens and your request has z input tokens (z > z - z).)r   rh   r-   r.   r6   r4   r(   r   r   r   r   rV   r9   r8   r,   r&   Zmax_completion_tokensr   r   )r   r   r   r   Z	token_num
operationsZ	operationr   r`   r`   ra   r   >  sx   



zOpenAIServing._validate_inputTprompt_inputc                    s6   | j |||g||d2 z	3 dH W }|  S 6 td)z
        A simpler implementation of
        [`_tokenize_prompt_input_or_inputs`][vllm.entrypoints.openai.serving_engine.OpenAIServing._tokenize_prompt_input_or_inputs]
        that assumes single input.
        r   r   Nz$No results yielded from tokenization)_tokenize_prompt_inputs_asyncr   )r   r   r   r   r   r   resultr`   r`   ra   _tokenize_prompt_input_async  s   z*OpenAIServing._tokenize_prompt_input_asyncprompt_inputsc                 C  sR   |D ]#}t |tr| j|||||dI dH V  q| j||||dI dH V  qdS )z
        A simpler implementation of
        [`_tokenize_prompt_input_or_inputs`][vllm.entrypoints.openai.serving_engine.OpenAIServing._tokenize_prompt_input_or_inputs]
        that assumes multiple inputs.
        )rW   r   r   N)r   r   )rh   r\   r   r   )r   r   r   r  r   r   textr`   r`   ra   r     s"   
z+OpenAIServing._tokenize_prompt_inputs_asyncinput_or_inputsc                    s   t t  }t t  }t|tr|jdur|| |j| |du s)|r-|dkr-g |fS t|}g }	|D ]%}
|
d du rJ| j	|||
d ||d}n| j
|||
d |d}|	| q5tj|	 I dH }|| ||fS )a   
        Tokenize/detokenize depending on the input format.

        According to `OpenAI API <https://platform.openai.com/docs/api-reference/embeddings/create>`_
        , each input can be a string or array of tokens. Note that each request
        can pass one or more inputs.
        N Z	is_tokensFr   r   r   )r^   r@   rV   rh   r*   rb   extend_load_prompt_embedsrB   r   r   r   asynciogather)r   r   r   r  r   r   Zinputs_embedsZinputs_textZbatch_inputstasksr   taskresultsr`   r`   ra   &_tokenize_prompt_input_or_inputs_async  sD   




z4OpenAIServing._tokenize_prompt_input_or_inputs_async.c                       d S r   r`   r   r   r   r  r   r   r`   r`   ra   _preprocess_completion     z$OpenAIServing._preprocess_completionc                    r  r   r`   r  r`   r`   ra   r    r  c                    s   t |ts|d u rtd| j|||||dI d H \}}dd |D }t|dr1|jd ur1|jnd }	|	r>|D ]}
|	|
d< q7t |tsK|d urK||fS dd |D }|	r]|D ]}|	|d< qV|| }|| }||fS )NzFPrompt embeds with non-completion requests is not currently supported.r   c                 S      g | ]	}t |d  dqS )rX   rX   )rw   )r   Zrequest_prompt_textr`   r`   ra   r   '      z8OpenAIServing._preprocess_completion.<locals>.<listcomp>
cache_saltc                 S   r  )rb   )rb   )rx   )r   Zrequest_prompt_embedsr`   r`   ra   r   >  r  )rh   r*   r   r  r   r  )r   r   r   r  r   r   Zrequest_prompts_textZrequest_prompts_embedsZengine_prompts_textr  Zprompt_textZengine_prompts_embedsZprompt_embedrq   rr   r`   r`   ra   r    sT   
	

	
r   r   r   add_generation_promptcontinue_final_message
tool_dicts	documentschat_template_kwargstool_parserc                    s  | j }t|||||d}t||||d\}}t|||||	d}||
p%i  |d u r.d}nt|tr>t|fd|i|}ntd|||d|}|I d H }|d uo[t	|do[|j
dk}|rqt|tsid	}t|||j|d
}|d u rt|ts~J dt|dgd}n%t|tr| j|||||dI d H }nt|tsJ dt|||d}t|d d}|d ur||d< |jd ur|j|d< t	|dr|jd ur|j|d< ||g|gfS )N)ry   )Zcontent_format)r   r  r  Ztoolsr  placeholderr   )r   conversationry   tool_choicenonez5Tool usage is only supported for Chat Completions API)r   )zPrompt has to be a stringz%when the tokenizer is not initialisedr   r   r   z7Prompt has to be either a string or a list of token idsrX   r  Zmulti_modal_datamm_processor_kwargsr  r`   )ry   r#   r"   ri   updaterh   rQ   r!   r    r   r  r&   NotImplementedErrorZadjust_requestr\   rV   r   rS   r_   r   rw   r   r  )r   r   r   r   r   r   r  r  r  r  r  r  r   r   ry   Zresolved_content_formatr  Zmm_data_futureZ_chat_template_kwargsrequest_promptZmm_dataZshould_parse_toolsmsgr  r   r`   r`   ra   _preprocess_chatK  s   





	


zOpenAIServing._preprocess_chatr   r   r#  r   sampling_paramscontextr   r   c                 K  s   |}		 | j ||||d | jj|||f||d|}
|
2 z3 d H W }|| |V  q6 | s4d S | I d H }|| | }t|d}|}| jt	| |_
|	d }q)NTr   )r   r   r  r   )r   r   generateZappend_outputZneed_builtin_tool_callZ	call_toolZrender_for_completionrw   r   r   r   )r   r   r#  r   r&  r'  r   r   kwargsZorig_priorityr   r   Ztool_outputrX   r`   r`   ra   _generate_with_builtin_tools  sJ   

z*OpenAIServing._generate_with_builtin_toolsrb   c                    sD   dt dtffdd | r t| tr fdd| D S  | gS g S )Nembedre   c                    s   t jttj| dddt dd}t|t jr$|j	t j
t jt jfv s&J | }| dkr=|d}| dks=J  d urH|  d  }d|iS )NT)validatecpu)Zweights_onlyZmap_location   r   rb   )rc   loadioBytesIOpybase64	b64decodeZdevicerh   rd   ZdtypeZfloat32Zbfloat16Zfloat16Zto_densedimZsqueeze)r+  Ztensorr  r`   ra   _load_and_validate_embed  s$   

zCOpenAIServing._load_prompt_embeds.<locals>._load_and_validate_embedc                    s   g | ]} |qS r`   r`   )r   r+  )r5  r`   ra   r     s    z5OpenAIServing._load_prompt_embeds.<locals>.<listcomp>)bytesr@   rh   r^   )rb   r   r`   )r5  r   ra   r    s   


z!OpenAIServing._load_prompt_embedsinputsr   c                 C   sx   | j d u rd S d\}}}t|tr|}nt|tr|}nd|v r&|d}n|d }|d }| j j||||||d d S )N)NNNrb   rW   rX   r   )r   rh   r\   r^   r   Z
log_inputs)r   r   r7  r   r   rW   rX   rb   r`   r`   ra   r     s&   




zOpenAIServing._log_inputsr   c                    s0   | j  I d H }|rt|S t|rt  d S r   )r   is_tracing_enabledrN   rM   rO   )r   r   r8  r`   r`   ra   r   1  s   z OpenAIServing._get_trace_headersr   defaultc                 C   s$   |pt  }| du r|S | jd|S )z6Pulls the request id to use from a header, if providedNzX-Request-Id)rU   r   r   )r   r9  r`   r`   ra   _base_request_id?  s   
zOpenAIServing._base_request_idlogprobtoken_idreturn_as_token_idc                 C   s(   |rd| S | j d ur| j S ||S )Nz	token_id:)Zdecoded_tokenr   )r;  r<  r   r=  r`   r`   ra   _get_decoded_tokenI  s
   


z OpenAIServing._get_decoded_tokenr   c                 C   s   |sdS | j |S NT)r   Zis_base_model)r   r   r`   r`   ra   r   U  s   z!OpenAIServing._is_model_supportedc                 C   s    |r|j S |s| jjd jS |S r   )r   r   Zbase_model_pathsname)r   r   r   r`   r`   ra   _get_model_nameZ  s
   zOpenAIServing._get_model_name)Fr?  )..)TFNNNNNFr   r   )NN)brY   rZ   r[   r   r   r\   r]   r   r   r>   r   r%   boolr   rR   r   r   r2   r   r   AnyResponser   r   r   r   r   rH   r   r   r   r   r   r   r   
AnyRequestr   rD   r   r   r   r   rP   r   r_   r   rV   r   r^   r   r   r   r   r   r   r@   r  r   r,   r.   r4   r(   r6   r9   rw   r  r*   rx   CompletionLikeRequestChatLikeRequestr5   r   r   ri   r	   r
   r?   r   r   rv   r%  rJ   r$   r*  staticmethodr6  r  rI   r   r   r   r   r   r:  rK   r>  r   rA  __classcell__r`   r`   r   ra   r      s  
 











:
&





%

F


$

>	


F
	

v
7!

	r   prompt_logprobsc                 C   sF   | d u r| S | D ]}|d u rq|  D ]}|jtdkrd|_qq| S )Nz-infg    )r   r;  float)rI  Zlogprob_dictZlogprob_valuesr`   r`   ra   clamp_prompt_logprobsd  s   rK  )r  r0  r   sysr   collections.abcr   r   r   r   concurrent.futuresr   httpr   typingr   r	   r
   r   r   r   r   r   r   r   r2  rc   Zfastapir   Zpydanticr   r   r   Zstarlette.datastructuresr   Ztyping_extensionsr   version_infor   Z	vllm.envsr   Zvllm.configr   Zvllm.engine.protocolr   Zvllm.entrypoints.chat_utilsr   r   r   r    r!   r"   r#   Zvllm.entrypoints.contextr$   Zvllm.entrypoints.loggerr%   Z vllm.entrypoints.openai.protocolr&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   Z&vllm.entrypoints.openai.serving_modelsr>   Z$vllm.entrypoints.openai.tool_parsersr?   Zvllm.inputs.datar@   rx   rA   rw   Zvllm.inputs.parserB   Zvllm.loggerrC   Zvllm.lora.requestrD   Zvllm.multimodalrE   Zvllm.outputsrF   rG   Zvllm.pooling_paramsrH   Zvllm.sampling_paramsrI   rJ   Zvllm.sequencerK   rL   Zvllm.tracingrM   rN   rO   Z!vllm.transformers_utils.tokenizerrP   rQ   Z
vllm.utilsrR   rS   rT   rU   rY   loggerrE  rF  ZSpeechToTextRequestrD  rC  rV   r^   r_   r\   rv   rk   rm   rn   rp   rz   r   ZClassificationServeContextr   Zmodel_rebuildr   rK  r`   r`   r`   ra   <module>   s   0
$h       *
