o
    )ij                    @   s   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZ d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' d dl(m)Z) d dl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z> d dl?m@Z@mAZA d dlBmCZC d dlDmEZEmFZF d dlGmHZH d dlImJZJ d dlKmLZM d dlNmOZO d dlPmQZQmRZR d dlSmTZTmUZU d dlVmWZWmXZX d dlYmZZZ d dl[m\Z\m]Z] d dl^m_Z_m`Z`maZa d dlbmcZc eOedZeG dd de@ZfdS )    N)AsyncGeneratorAsyncIterator)Sequence)CallableFinalOptionalUnion)Request)Message)TypeAdapter)ModelConfig)EngineClient)ChatTemplateContentFormatOptionConversationMessagerandom_tool_call_id)get_developer_message%get_stop_tokens_for_assistant_actions#get_streamable_parser_for_assistantget_system_messageparse_chat_inputparse_chat_outputrender_for_completion)RequestLogger)ChatCompletionLogProbChatCompletionLogProbsChatCompletionLogProbsContent"ChatCompletionNamedToolChoiceParamChatCompletionRequestChatCompletionResponseChatCompletionResponseChoice"ChatCompletionResponseStreamChoiceChatCompletionStreamResponseChatMessageDeltaFunctionCallDeltaMessageDeltaToolCallErrorResponseFunctionCallFunctionDefinitionPromptTokenUsageInfoRequestResponseMetadataToolCall	UsageInfo)OpenAIServingclamp_prompt_logprobs)OpenAIServingModels)
ToolParserToolParserManager)MistralToolCall)get_max_tokens)TokensPrompt)init_logger)CompletionOutputRequestOutput)ReasoningParserReasoningParserManager)BeamSearchParamsSamplingParams)Logprob)AnyTokenizerMistralTokenizer)maybe_serialize_tool_callstruncate_tool_call_idsvalidate_request_params)as_listc                !       sH  e Zd Zddddddddddedededed	ee d
ee de	de
dede
de
dee de
de
de
ddf  fddZ	dFdedee deeedf eef fddZdedefddZedGdedefd d!Zed"ed#edeee
f fd$d%Zd#ed&ee d"ed'e
deee e
f f
d(d)Zded*ee d+ed,ed-ee d.ed/e de
deedf fd0d1Z!ded*ee d+ed,ed-ee d.ed/e deeef fd2d3Z"d4e#ee$f d5ee d.ed6e
dee% f
d7d8Z&		dHd9e'e d5e'ee#ee$f   d.ed:ee d;ee
 de(fd<d=Z)defd>d?Z*d@ee dAe+de
fdBdCZ,defdDdEZ-  Z.S )IOpenAIServingChatF N)return_tokens_as_token_idsreasoning_parserenable_auto_tools#exclude_tools_when_tool_choice_nonetool_parserenable_prompt_tokens_detailsenable_force_include_usageenable_log_outputsengine_clientmodel_configmodelsresponse_rolerequest_loggerchat_templatechat_template_content_formatrE   rF   rG   rH   rI   rJ   rK   rL   returnc             
      s  t  j||||||d || _|| _|| _|| _|
| _| jr#td d | _	|	rMzt
|	| _	| j	d us6J W n tyL } z	td|	d|d }~ww d | _| jrz|dkrc|jdrctd t|| _W n ty } z	td| d	|d }~ww || _|| _|| _| j | _| jr| jj}|d
krdn|}td|| j |jjdk| _| jrd| jvrg | jd< | jd t  d| _ d | _!d| _"d | _#d S )N)rM   rN   rO   rQ   rE   rK   z"auto" tool choice has been enabled please note that while the parallel_tool_calls client option is preset for compatibility reasons, it will be ignored.zreasoning_parser=z has not been registeredZpythoniczmeta-llama/Llama-3.2z>Llama3.2 models may struggle to emit valid pythonic tool callsz7Error: --enable-auto-tool-choice requires tool_parser:'z' which has not been registeredautomodelz.Using default chat sampling params from %s: %sZgpt_ossZstop_token_idsF)$super__init__rP   rR   rS   rL   rG   loggerinforF   r9   Zget_reasoning_parser	Exception	TypeErrorrI   rV   
startswithwarningr1   Zget_tool_parserrH   rJ   rK   rN   Zget_diff_sampling_paramdefault_sampling_paramsZgeneration_configZ	hf_configZ
model_typeuse_harmonyextendr   supports_browsingZbrowser_toolsupports_code_interpreterZpython_tool)selfrM   rN   rO   rP   rQ   rR   rS   rE   rF   rG   rH   rI   rJ   rK   rL   esource	__class__ p/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_chat.pyrX   <   s   






zOpenAIServingChat.__init__requestraw_requestc                    s  |  |I dH }|durtd| |S | jjr| jjz| j|dd}| |j|}| j	|I dH }| j
}t|trIt| t| t| |jdkrc| jrU|dusct|tsc| jsc| dW S |jdu sp|jdkrs| jrsd}ndd	 |jD }| js| j|||j|jp| j| j|j|j||j|j||j|jd
I dH \}	}
}n|  |\}	}
}W n' t!t"t#t$j%fy } zt&d | | d|j' W  Y d}~S d}~ww d| (||j) }t*|d}|r||j+_,g }zut-|D ]n\}}| j.du ri | _.t/| j0|t1|d | j.d}|j2r|3|| j.}n
|4|| j5j6| j.}| j7||
| ||d |du r2dn| 8|j9I dH }t|t:rL| jj;||||d}n| jj<||||||j=d}|>| qW n t!yz } z| t?|W  Y d}~S d}~ww t1|dksJ |\}|j@r| jA|||||	||| jBdS z| C|||||	||I dH W S  t!y } z| t?|W  Y d}~S d}~ww )z
        Chat Completion API similar to OpenAI's API.

        See https://platform.openai.com/docs/api-reference/chat/create
        for the API specification. This API mimics the OpenAI
        Chat Completion API.
        NzError with model %sT)Zsupports_default_mm_lorasrU   zV"auto" tool choice requires --enable-auto-tool-choice and --tool-call-parser to be setnonec                 S   s   g | ]}|  qS ri   )
model_dump).0Ztoolri   ri   rj   
<listcomp>   s    z<OpenAIServingChat.create_chat_completion.<locals>.<listcomp>)
rR   rS   add_generation_promptcontinue_final_message
tool_dicts	documentschat_template_kwargsrI   truncate_prompt_tokensadd_special_tokensz$Error in preprocessing prompt inputs z	chatcmpl-)
request_idprompt_token_ids)max_model_lenrk   Zinput_lengthr_   )paramslora_request)promptry   r|   r}   )r}   trace_headerspriority   )rK   )DZ_check_modelrY   errorrM   ZerroredZ
dead_errorZ_maybe_get_adaptersZ_get_model_namerV   Zget_tokenizerrI   
isinstancer>   r?   r@   rA   tool_choicerG   r`   create_error_responsetoolsrH   Z_preprocess_chatmessagesrR   rS   rq   rr   rt   ru   rv   rw   _make_request_with_harmony
ValueErrorr\   RuntimeErrorjinja2ZTemplateError	exception	__cause__Z_base_request_idry   r*   staterequest_metadata	enumerater_   r3   r{   lenZuse_beam_searchZto_beam_search_paramsZto_sampling_paramsrN   Zlogits_processor_patternZ_log_inputsZ_get_trace_headersheadersr:   Zbeam_searchgenerater   appendstrstream chat_completion_stream_generatorrK   chat_completion_full_generator)rd   rk   rl   Zerror_check_retr}   
model_name	tokenizerrI   rs   conversationZrequest_promptsZengine_promptsre   ry   r   
generatorsiengine_promptZ
max_tokensZsampling_paramsr   	generatorresult_generatorri   ri   rj   create_chat_completion   s"  






"



	.
z(OpenAIServingChat.create_chat_completionc                 C   s   |j r| jS |jd d S )Nrole)rq   rP   r   rd   rk   ri   ri   rj   get_chat_request_roleK  s   z'OpenAIServingChat.get_chat_request_role{}sc                 C   s4   d}| D ]}||kr|d7 }q||kr|d8 }q|S )zS
        Calculate the current level of nested brackets in a given string.
        r   r   ri   )r   Zopeningclosinglevelcharri   ri   rj   _bracket_levelP  s   
z OpenAIServingChat._bracket_level
delta_textprevious_textc                 C   sz   t |}d\}}| D ]-}|dkr|d7 }|dk}n|dkr&|d8 }|dk}|dkr/||7 }q|dkr8 ||fS q||fS )N)rD   Fr   r   r   r   ,)rC   r   )r   r   Zbracket_levelZupdated_deltaZpassed_zerocri   ri   rj   _filter_delta_text]  s    
	

z$OpenAIServingChat._filter_delta_textcurrent_textfunction_name_returnedc                 C   s  |d u s|dkrd |fS zt |}W n t jjjy&   td d }Y nw |d u s6t|tr6t	|dks>d}d }||fS t
||\}}|d }	|s\d|	vsTd|	vr\d}d }||fS |std|}
|
rk|
d	nd}t
||\}}|rd|	vr|d
 }	d}ttt t|	d |dt	|d	 ddgd}||fS t
||\}}|dkrtttd |dt	|d	 dgd}||fS d }||fS )NrD   z(not enough tokens to parse into JSON yetr   Fr   name
parametersz.*"parameters":\s*(.*)r   Tr   	argumentsfunction)idr   indextype
tool_callsr   r   )partial_json_parserloadscore
exceptionsZMalformedJSONrY   debugr   listr   rC   r   researchgroupr$   r%   r   r#   )rd   r   r   r   r   objdelta_message_Zfinishes_previous_toolZcurrent_tool_callZparam_matchr   ri   ri   rj   $extract_tool_call_required_streamingx  s|   
8,

z6OpenAIServingChat.extract_tool_call_required_streamingr   ry   r   r   r   r   c	           E      C  s  t t }	d}
d}|jd u rdn|j}dg| }dg| }d}d }| jr/dd t|D }t|jtr;|jjj	}nd }| oD| 
|}dg| }dg| }|sT| jrdg g| }dg| }dg| }n
|jd	krld }nd }z
| jrw| |}W n) ty } ztd
 | t|}d| dV  dV  W Y d }~d S d }~ww z|r| jr| |g| }nd g| }W n) ty } ztd | t|}d| dV  dV  W Y d }~d S d }~ww |j}|r|jp|}|o|j}nd\}}z|2 z43 d H W } | jd urt| j}| jd ur|t| j7 }|r| j}| |}!t|D ]1}"t|"t|!ddd d d}#t||
|	|#g|d}$|rJt|d|d|$_|$j dd}d| dV  q'|j!rd}%|r|d|d v r||d "d|!kr||d d p{d}%|%rt|D ]0}"t|"t|%dd d d}#t||
|	|#g|d}$|rt|d|d|$_|$j dd}d| dV  qd}| j#D ]s}&|&j$}"||" }'||" rʐq|j%r|j&d ur|&j%d usJ d| j'|&j(|&j%||j&|j)d}(nd }(| jr||" })|&j(D ]}*|)*|* q|)j+dk}+|j,s|+sq|)j-pd},n|&j.},|,s)|&j(s)||" s)q|s0| jr^| js^|d us;J |d usBJ ||" }-||" }.|-|, }/|.rY|.t/|&j( }0nt/|&j(}0| jrs|+rlt|,d}1nt|,d}1n|r| jr||" s|0|.s|d usJ |1|-|/|,|.|0|&j(}1|0t/|&j(s| jr|0| jrd||"< |1r|1j2r|1j2}/d |1_2n4d}/n0| jr|-|, },d}/||" rt3t4|,d|"d}2nt3t5 dt4||,d |"d!}2d||"< t|2gd"}1n|jd	kr-|d us J ||" }-|-|, }/||" }3| jr|6|/|\}4}5n|/}5| j7|-|5|,|3d#\}1||"< |/||"< n|r| jr|'d us;J |d usBJ |d usIJ |d usPJ t/|&j(}6||" s|1|-|/|,|.|0|6}1| jr|0| jrd||"< |6}0|1r|1j2r|1j2}/d |1_2nd}/|0|6rd||"< |8|6}0|1r|1j2r|1j2}/d |1_2nPd}/nM|6}7||" sd||"< d}-g }.|/},|0}7|'j9|-|/|,|.|0|7|d$}1n-|r|'d usJ |'j9|-|/|,|.|0|&j(|d$}1n| jr|1|-|/|,|.|0|&j(}1nt|,d}1|s| jr|d us J |d usJ |/||"< |0||"< n|d usJ ||"  |,7  < ||"  t|&j(7  < |1d u r1q| j:re| j;red}8|1j2rC|1j2}8n|1j<rRd=d%d& |1j<D }8|8re| j;j>||8t/|&j(|&j?ddd' |&j?d u rtt|"|1|(d d}#nd}9|'rt|'j@dk}9|9rt|'j@d nd}:nd}:| A|1|&r|'rd};t|1j<d jt4rt|1j<d jjBtrt|1j<d jjB};tCjD|'j@|: "d(i dd)}<|'jE|: }=|;dkr|=d |;  }=|<F|=dd}>tt3|:t4|>djGdd*d+gd"}1t|"|1|(|9s|&j?nd,|&jHd-}#d||"< t||
|	|#g|d}$|r||" }?t||?||? d|$_|$j dd}d| dV  qq6 |retI|}?t||?||? d}@| jJrM|rMtK|d.|@_Lt||
|	g ||@d/}A|Aj ddd0}Bd|B dV  tI|}Ct||C||C d|_M| j:r| j;rt|D ]%}"|r|"t|k r||" nd1||"  d2}D| j;j>||Dd d3ddd' qW n& ty } ztd4 | t|}d| dV  W Y d }~nd }~ww dV  d S )5Nzchat.completion.chunkTr   r   Fc                 S   s   g | ]}t  qS ri   )r   )ro   r   ri   ri   rj   rp     s    zFOpenAIServingChat.chat_completion_stream_generator.<locals>.<listcomp>rD   required#Error in reasoning parser creation.zdata: z

zdata: [DONE]

Error in tool parser creation.)FF)r   content)r   deltalogprobsfinish_reason)r   objectcreatedchoicesrV   Zprompt_tokenscompletion_tokensZtotal_tokens)exclude_unsetr   r   r   r   Did not output logprobs)	token_idstop_logprobsr   num_output_top_logprobsreturn_as_token_idfinal)reasoning_content)r   r   r   r   )r   r   r   r   r   )r   r   r   r   )r   r   r   previous_token_idscurrent_token_idsdelta_token_idsrk   c                 s   s&    | ]}|j r|j jr|j jV  qd S N)r   r   )ro   Ztcri   ri   rj   	<genexpr>~  s    
zEOpenAIServingChat.chat_completion_stream_generator.<locals>.<genexpr>ry   outputsoutput_token_idsr   Zis_streamingr   r   ensure_ascii)exclude_none)r   r   r   )r   r   r   r   stop_reasonZcached_tokens)r   r   r   r   rV   usage)r   r   z<streaming_complete: z tokens>Zstreaming_completez*Error in chat completion stream generator.)Ninttimenr`   ranger   r   r   r   r   %_should_stream_with_auto_tool_parsingrF   r   rY   r   Zcreate_streaming_error_responser   rI   r[   stream_optionsinclude_usageZcontinuous_usage_statsrz   r   encoder_prompt_token_idsnum_cached_tokensr   r    r$   r!   r,   r   Zmodel_dump_jsonechogetr   r   r   r   _create_chat_logprobsr   rE   processZcurrent_channelinclude_reasoningZlast_content_deltatextrB   Zis_reasoning_endZ#extract_reasoning_content_streamingr   r%   r#   r   extract_reasoning_contentr   Zextract_content_idsZextract_tool_calls_streamingrL   rQ   r   joinlog_outputsr   Zprev_tool_call_arr,_should_check_for_unstreamed_tool_arg_tokensr   jsondumpsZstreamed_args_for_toolreplacern   r   sumrJ   r)   prompt_tokens_detailsfinal_usage_info)Erd   rk   r   ry   r   r   r   r   rK   created_timeZchunk_object_typeZfirst_iterationZnum_choicesZprevious_num_tokensZfinish_reason_sentnum_prompt_tokensr   Zharmony_parsersZtool_choice_function_nameZtool_choice_autor   Zprevious_textsZall_previous_token_idsZadded_content_delta_arrZreasoning_end_arrrF   re   dataZtool_parsersr   r   Zinclude_continuous_usageresr   r   choice_datachunklast_msg_contentoutputrI   r   Zharmony_parsertoken_idis_finalr   r   r   r   r   r   Zdelta_tool_callZfn_name_returnedr   r   r   r   Zdelta_contentauto_tools_calledr   Zlatest_delta_lenZexpected_callZactual_callZremaining_callr   Zfinal_usageZfinal_usage_chunkZfinal_usage_dataZnum_completion_tokens	full_textri   ri   rj   r     s  
















	
















	





     K



z2OpenAIServingChat.chat_completion_stream_generatorc           *         sP  t t }d }	z|2 z3 d H W }
|
}	q6 W n' tjy&   | d Y S  ty> } z| t|W  Y d }~S d }~ww |	d usEJ g }| |}|	jD ]}|j	}|j
}|j
ru|jd uru|d ushJ d| j|||j||jd}nd }| jrt|\}}}|jsd }|rtdt|||d}t|j|||rdn|jr|jnd|jd}|| qO| jrz| |}W n  ty } ztd	 | t|W  Y d }~  S d }~ww |j|j|d
\}}|jsd }nd }|j}d}| jr| jst |j!t"s|j!dkrt|||d}n|j!r6t#|j!t"u r6t |t$r t%nt& t||d t'|j!j(j)|ddgd}n|j!rj|j!dkrjt |t$rHt%nt& |d usQJ t*t+t, -|}t|d| fdd|D d}n|j!rt|j!dkr|t|||d}n|j.r|j!dks|j!d u r| jr| jrz| |}W n! ty } ztd | t|W  Y d }~  S d }~ww |j/|d ur|nd|d
}|j0}|j0rt|||j1|j2d}n%|}|j1rt3|j1dkr|j1}t|||d}nt4d t|||d}t|j|||rdn|jr|jnd|jd}|| qO|j5red}|rBd|d v rB|d 6d|krB|d d pAd}t |t+rRd7dd |D }|D ]}||j8j1p]d } | |j8_1qT|	j9d usmJ t3|	j9}!|	j:d ur|!t3|	j:7 }!t;dd |	jD }"t<|!|"|!|" d }#| j=r|	j>rt?|	j>d!|#_@|#|_AtB|||||#tC|	jD|	jEd"}$| jFr&| jGr&|D ]h}d}%|j8j1r|j8j1}%n7|j8j2rg }&|j8j2D ] }'tH|'j(d#rtH|'j(d$r|&|'j(j) d%|'j(jI d& qd'7|&}(d(|( d)}%|%r$d })|jt3|	jk r|	j|j j	})| jGjJ||%|)|jddd* q|$S )+NzClient disconnectedr   )r   r   r   r   r   zdTool call in Chat Completion API is not supported for gpt-oss yet. Please use Responses API instead.)r   r   r   r   stop)r   messager   r   r   r   )rk   Fr   rD   r   r   )r   r   r   r   c              	      s,   g | ]} t |jtj|jd dddqS )Fr   r   r  )r'   r   r  r  r   )ro   	tool_callZtool_call_classri   rj   rp     s    zDOpenAIServingChat.chat_completion_full_generator.<locals>.<listcomp>)r   r   r   r   rm   rU   r   r   z~Error in chat_completion_full_generator - cannot determine if tools should be extracted. Returning a standard chat completion.r   r   r   
c                 s   s    | ]}|d  V  qdS )r   Nri   )ro   msgri   ri   rj   r     s    zCOpenAIServingChat.chat_completion_full_generator.<locals>.<genexpr>c                 s   s    | ]}t |jV  qd S r   )r   r   )ro   r  ri   ri   rj   r     s    

r   r   )r   r   rV   r   r   prompt_logprobskv_transfer_paramsr   r   ()z, z[tool_calls: ]r   )Kr   r   asyncioCancelledErrorr   r   r   r   r   r   r   r   r   rE   r`   r   r   NotImplementedErrorr"   r   r   r   r   r   rF   r   rY   r   r   r   rG   rI   r   r   r   r   r>   r2   r+   r'   r   r   r   r   r(   Zvalidate_jsonr   Zextract_tool_callsZtools_calledr   r   r   r   r   r   r   r  rz   r   r  r,   rJ   r   r)   r  r  r   r.   r  r  rL   rQ   hasattrr   r  )*rd   rk   r   ry   r   r   r   r   r	  Z	final_resr  re   r   r   r  r   Zout_logprobsr   r   Zfinal_contentZis_tool_callr  r  rF   r   r  r   rI   Ztool_call_infoZret_contentr  choiceZfull_messager
  Znum_generated_tokensr   responseZoutput_textZtool_call_descriptionsr  Ztool_calls_strr   ri   r  rj   r   !  s  














	z0OpenAIServingChat.chat_completion_full_generatorr   r   should_return_as_token_idc                    s"    fddt | D S )Nc              
      s^   g | ]+\}}r-|k rt  j|d  |d d t|d  jdtjddddqS )r   r   )r       utf-8r  errors)tokenlogprobbytes)r   _get_decoded_tokenmaxr-  r   encode)ro   r   prd   r'  r,  r   r   ri   rj   rp   C  s     

z7OpenAIServingChat._get_top_logprobs.<locals>.<listcomp>)r   items)rd   r   r   r   r'  ri   r3  rj   _get_top_logprobs?  s   

z#OpenAIServingChat._get_top_logprobsr   r   r   c                 C   s   g }|dur|n| j }t|D ]_\}}	|| }
|
du s"|
|	du r?||	}|r.d|	 }|t|t|jdddd q|
|	 }|j}|t| 	||	||t
|jd|du r[dnt|jddd| |
|||d qt|d	S )
zCreate OpenAI-style logprobs.Nz	token_id:r)  r  r*  )r,  r.  r(  )r,  r-  r.  r   r   )rE   r   r   decoder   r   r   r1  Zdecoded_tokenr/  r0  r-  r5  r   )rd   r   r   r   r   r   Zlogprobs_contentr'  r   r  Zstep_top_logprobsr,  Z
step_tokenZstep_decodedri   ri   rj   r   Q  sT   	



z'OpenAIServingChat._create_chat_logprobsc                 C   s   |j o| jo| jo|jdv S )ae  
        Utility function to check if streamed tokens should go through the tool
        call parser that was configured.

        We only want to do this IF user-provided tools are set, a tool parser
        is configured, "auto" tool choice is enabled, and the request's tool
        choice field indicates that "auto" tool choice should be used.
        )rU   N)r   rI   rG   r   r   ri   ri   rj   r     s   
z7OpenAIServingChat._should_stream_with_auto_tool_parsingr   r  c                 C   sL   t |jduo$| jo$| jo$|o$|jo$|jd o$|jd jo$|jd jjduS )z
        Check to see if we should check for unstreamed tool arguments tokens.
        This is only applicable when auto tool parsing is enabled, the delta
        is a tool call with arguments.
        Nr   )boolr   rG   rI   r   r   r   )rd   r   r  ri   ri   rj   r    s    

z>OpenAIServingChat._should_check_for_unstreamed_tool_arg_tokensc                 C   s|   g }| j rJ | jrJ t|jd d d}|| t }|| |jD ]	}|t| q$t|}t	|d}||g|gfS )N)reasoning_effortZbrowser_descriptionZpython_description)rz   )
rb   rc   r   r8  r   r   r   r   r   EngineTokensPrompt)rd   rk   r   Zsys_msgZdev_msgZchat_msgrz   r   ri   ri   rj   r     s    





z,OpenAIServingChat._make_request_with_harmonyr   )r   r   )NN)/__name__
__module____qualname__r   r   r/   r   r   r   r   r7  rX   r   r	   r   r   r   r&   r   r   staticmethodr   r   tupler   r$   r   r   r7   r   r   r=   r*   r   r   dictr<   r   r5  GenericSequencer   r   r   r6   r  r   __classcell__ri   ri   rg   rj   rC   :   s4   	
a
 2

O	


    ^
	
   


0

rC   )gr!  r  r   collections.abcr   r   r   r@  typingr   r   r   r   r   r   regexr   Zfastapir	   Zopenai_harmonyr
   ZOpenAIMessageZpydanticr   Zvllm.configr   Zvllm.engine.protocolr   Zvllm.entrypoints.chat_utilsr   r   r   Zvllm.entrypoints.harmony_utilsr   r   r   r   r   r   r   Zvllm.entrypoints.loggerr   Z vllm.entrypoints.openai.protocolr   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   Z&vllm.entrypoints.openai.serving_enginer-   r.   Z&vllm.entrypoints.openai.serving_modelsr/   Z$vllm.entrypoints.openai.tool_parsersr0   r1   Z8vllm.entrypoints.openai.tool_parsers.mistral_tool_parserr2   Zvllm.entrypoints.utilsr3   Zvllm.inputs.datar4   r9  Zvllm.loggerr5   Zvllm.outputsr6   r7   Zvllm.reasoningr8   r9   Zvllm.sampling_paramsr:   r;   Zvllm.sequencer<   Z!vllm.transformers_utils.tokenizerr=   r>   Z"vllm.transformers_utils.tokenizersr?   r@   rA   Z
vllm.utilsrB   r:  rY   rC   ri   ri   ri   rj   <module>   sD   $X