o
    )i-                     @   s"  d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZmZ d dlZd dlm  mZ d dlmZ d dlmZ d d	lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z# d d
l$m%Z& d dl'm(Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z4m5Z5m6Z6m7Z7 d dl8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@ d dlAmBZB d dlCmDZDmEZEmFZFmGZGmHZHmIZImJZJ d dlKmLZL d dlMmNZN d dlOmPZPmQZQ d dlRmSZT d dlUmVZV d dlWmXZX d dlYmZZZm[Z[ d dl\m]Z] d dl^m_Z_ d dl`maZa eVebZcG dd deLZddS )     N)AsyncGeneratorAsyncIterator)AsyncExitStack)copy)
HTTPStatus)AnyCallableFinalOptionalUnion)Request)	BaseModel)
ResponseCreatedEventResponseFunctionToolCallResponseInProgressEventResponseOutputItemResponseOutputItemDoneEventResponseOutputMessageResponseOutputTextResponseReasoningItemResponseReasoningTextDeltaEventResponseReasoningTextDoneEvent)Content)Message)envs)ModelConfig)EngineClient)ChatCompletionMessageParamChatTemplateContentFormatOption)ConversationContextHarmonyContextSimpleContextStreamingHarmonyContext)get_developer_message%get_stop_tokens_for_assistant_actionsget_system_messageget_user_messageparse_output_messageparse_remaining_stateparse_response_inputrender_for_completion)RequestLogger)ErrorResponseInputTokensDetailsOutputTokensDetailsRequestResponseMetadataResponsesRequestResponsesResponseResponseUsage)OpenAIServing)OpenAIServingModels)MCPToolServer
ToolServer)TokensPrompt)init_logger)CompletionOutput)ReasoningParserReasoningParserManager)SamplingParams)AnyTokenizer)random_uuidc                       sN  e Zd Zddddddddddedededee d	ee d
e	de
dede
dee dee de
de
de
ddf fddZ	d@dedee deeedf eef fddZdedee defddZdedee fddZ	d@ded ed!ee d"ed#eded$ed%ee deeef fd&d'Zded(ededee fd)d*Z d"e!dee fd+d,Z"	d@dedee dee# fd-d.Z$dedee dee% fd/d0Z&defd1d2Z'd3edeeef fd4d5Z(d3edeeef fd6d7Z)d3edefd8d9Z*d3edefd:d;Z+defd<d=Z,	d@ded ed!eee  d"ed#eded$ed%ee deedf fd>d?Z-  Z.S )AOpenAIServingResponsesF N)return_tokens_as_token_idsreasoning_parserenable_auto_toolstool_parsertool_serverenable_prompt_tokens_detailsenable_force_include_usageenable_log_outputsengine_clientmodel_configmodelsrequest_loggerchat_templatechat_template_content_formatrA   rB   rC   rD   rE   rF   rG   rH   returnc             
      s\  t  j||||||d || _|| _|| _d | _|r?zt|| _| jd us(J W n ty> } z	t	d|d|d }~ww || _
|| _| j | _| jrb| jj}|dkrXdn|}td|| j tj| _| jrntd |jjdk| _| jrtd	 d
| jvrg | jd
< | jd
 t  |	| _| jrtd i | _t | _ i | _!i | _"|| _#d S )N)rI   rJ   rK   rL   rA   rG   zreasoning_parser=z has not been registeredautomodelz.Using default chat sampling params from %s: %szz`VLLM_ENABLE_RESPONSES_API_STORE` is enabled. This may cause a memory leak since we never remove responses from the store.Zgpt_osszLFor gpt-oss, we ignore --enable-auto-tool-choice and always enable tool use.Zstop_token_idsz"auto" tool choice has been enabled please note that while the parallel_tool_calls client option is preset for compatibility reasons, it will be ignored.)$super__init__rM   rN   rH   rB   r;   Zget_reasoning_parser	Exception	TypeErrorrF   rG   rJ   Zget_diff_sampling_paramdefault_sampling_paramsZgeneration_configloggerinfor   ZVLLM_ENABLE_RESPONSES_API_STOREenable_storeZwarning_onceZ	hf_configZ
model_typeuse_harmonywarningextendr$   rC   response_storeasyncioLockresponse_store_lock	msg_storebackground_tasksrE   )selfrI   rJ   rK   rL   rM   rN   rA   rB   rC   rD   rE   rF   rG   rH   esource	__class__ u/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_responses.pyrS   F   sz   	






zOpenAIServingResponses.__init__requestraw_requestc                    s   |I d H }|d urtd| |S jjrjj|jr2js2|jr/j	ddt
jdS d|_|j}|d urp|dsC|S j4 I d H  j|}W d   I d H  n1 I d H saw   Y  |d u ro|S nd }z1|}|j|}j|I d H }jr||\}	}
}n|||I d H \}	}
}W n( ttttjtfy } zt d 	| d|j! W  Y d }~S d }~ww t"|j#d	}|r||j$_%j&d urt'j&t(r|js|j)r|j*rt+d
d |j*D r	dS g }g }jr!j&d ur!j&,dr|-d j&,dr!|-d t. 4 I d H  zj&d urM fdd|D }i }|D ]}|| I d H ||< q?nt/|dksVJ i }t0|D ]R\}}j1t/|d  }|2|j3}|d u rwd n4|j5I d H }jr|j)rt6|	|}n	t7|	|}nt8 }j9|j#|
| |||||j:|d}|-| q\W n$ ty } z	t;|W  Y d }~W  d   I d H  S d }~ww t/|dksJ |\}|jr|	j<|j#< |jrXt=t>> }t?j@||||g dd d}j4 I d H  |j|jA< W d   I d H  n1 I d H s w   Y  tBjCD||||||||d|jA d}|jA|jE< |Ffdd |W  d   I d H  S |j)rqG|||||||W  d   I d H  S zH|||||||I d H W W  d   I d H  S  tIy } z	t;|W  Y d }~W  d   I d H  S d }~ww 1 I d H sw   Y  	dS )NzError with model %sinvalid_request_errorzThis vLLM engine does not support `store=True` and therefore does not support the background mode. To enable these features, set the environment variable `VLLM_ENABLE_RESPONSES_API_STORE=1` when launching the vLLM server.err_typemessagestatus_codeFresp_z$Error in preprocessing prompt inputs )
request_idc                 s   s    | ]}|j d v V  qdS ))web_search_previewcode_interpreterNtype.0Ztoolrh   rh   ri   	<genexpr>   s
    
z:OpenAIServingResponses.create_responses.<locals>.<genexpr>zFMCP tool server is not supported in background mode and streaming modebrowserpythonc                    s    i | ]}|  j|qS rh   )enter_async_contextrE   Znew_session)ry   	tool_name)
exit_stackrc   rh   ri   
<dictcomp>  s    
z;OpenAIServingResponses.create_responses.<locals>.<dictcomp>r   prompt_token_ids)rs   Zrequest_promptengine_promptsampling_paramscontextlora_requestprioritytrace_headers   queued
model_namecreated_timeoutputstatususageZcreate_)namec                    s   j  d S N)rb   pop)_)response_idrc   rh   ri   <lambda>^  s    z9OpenAIServingResponses.create_responses.<locals>.<lambda>zShould not reach here)JZ_check_modelrW   errorrI   ZerroredZ
dead_errorstorerY   
backgroundcreate_error_responser   BAD_REQUESTZprevious_response_id
startswith_make_invalid_id_errorr`   r]   get_make_not_found_errorZ_maybe_get_adaptersZ_get_model_namerQ   Zget_tokenizerrZ   _make_request_with_harmony_make_request
ValueErrorrU   RuntimeErrorjinja2ZTemplateErrorNotImplementedError	exception	__cause__r/   rs   staterequest_metadatarE   
isinstancer5   streamtoolsanyhas_toolappendr   len	enumerateZmax_model_lenZto_sampling_paramsrV   Z_get_trace_headersheadersr"   r    r!   Z_generate_with_builtin_toolsr   strra   inttimer1   from_requestidr^   create_task_run_background_requestrb   add_done_callbackresponses_stream_generatorresponses_full_generatorrT   )rc   rj   rk   Zerror_check_retZprev_response_idprev_responser   r   	tokenizermessagesrequest_promptsengine_promptsrd   r   
generatorsZbuiltin_tool_listZtool_session_ctxsZtool_sessionsr~   ir   Zdefault_max_tokensr   r   r   	generatorresult_generatorr   responsetaskrh   )r   r   rc   ri   create_responses   sz  

(




"



0	*


_j
tt
vz'OpenAIServingResponses.create_responsesr   r   c                    sR   t |jdkrtd| ||}| j|||| j| jdI d H \}}}|||fS )Nr   z:Tool use is not supported in Responses API without Harmony)rM   rN   )r   r   r   _construct_input_messagesZ_preprocess_chatrM   rN   )rc   rj   r   r   r   r   r   r   rh   rh   ri   r   z  s   
z$OpenAIServingResponses._make_requestc                 C   s>   |j dkr	td| ||}t|}t|d}||g|gfS )NrP   zAOnly 'auto' tool_choice is supported in response API with Harmony)r   )Ztool_choicer   &_construct_input_messages_with_harmonyr*   EngineTokensPrompt)rc   rj   r   r   r   r   rh   rh   ri   r     s   

z1OpenAIServingResponses._make_request_with_harmonyr   r   r   r   r   r   c	              
      s  |d u rt t }z|2 z3 d H W }	q6 W n' tjy&   | d Y S  ty> }
 z| t|
W  Y d }
~
S d }
~
ww | jr[t|t	sIJ | 
|}|j}|j}|j}|j}n;t|tsbJ |j}|d uskJ t|jdkstJ |jd }| |||}|jd usJ t|j}t|j}|j}d}t|||| t|dt|dd}tj|||||d|d}|jr| j4 I d H " | j|j}|d u s|j d	kr|| j|j< W d   I d H  |S 1 I d H sw   Y  |S )
NzClient disconnectedr   r   )Zcached_tokens)Zreasoning_tokens)Zinput_tokensZoutput_tokensZtotal_tokensZinput_tokens_detailsZoutput_tokens_details	completedr   	cancelled)!r   r   r^   CancelledErrorr   r   r   rZ   r   r    (_make_response_output_items_with_harmonynum_prompt_tokensZnum_output_tokensnum_cached_tokensnum_reasoning_tokensr!   Zlast_outputr   outputs_make_response_output_itemsr   	token_idsr2   r-   r.   r1   r   r   r`   r]   r   r   r   )rc   rj   r   r   r   r   r   r   r   r   rd   r   r   Znum_generated_tokensr   r   Z	final_resfinal_outputr   r   stored_responserh   rh   ri   r     s~   



	

z/OpenAIServingResponses.responses_full_generatorr   c              
   C   s  | j r)z|  |}W n ty } ztd |d }~ww |j|j|d\}}nd }|j}| jrT| jrTd}|r;|}n|rCd| d}|rT| jj|j	||j
|jddd g }	|rotdt  g d	t|d
dgd d}
|	|
 |rt|g dd d}tdt  |gdddd}|	| |	S )Nz#Error in reasoning parser creation.)rj   r@   z[reasoning: ]F)rs   r   Zoutput_token_idsfinish_reasonZis_streamingdeltaZrs_	reasoningreasoning_texttextrw   )r   summaryrw   contentr   output_text)r   annotationsrw   logprobsZmsg_	assistantr   ro   )r   r   roler   rw   )rB   r   rW   r   Zextract_reasoning_contentr   rH   rL   Zlog_outputsrs   r   r   r   r>   ResponseReasoningTextContentr   r   r   )rc   rj   r   r   rB   rd   Zreasoning_contentr   r   r   reasoning_itemro   rh   rh   ri   r     st   
	




z2OpenAIServingResponses._make_response_output_itemsc                 C   sH   g }|j }|j|d  D ]	}|t| qt|j}|r"|| |S r   )num_init_messagesr   r\   r'   r(   parser)rc   r   Zoutput_itemsr   msgZ
last_itemsrh   rh   ri   r   -  s   

z?OpenAIServingResponses._make_response_output_items_with_harmonyc                 C   s   g }|j r|d|j d |d ur7| j|j }|| |jD ]}t|tr6|jD ]}|d|j	d q*q t|j
trH|d|j
d |S ||j
 |S )Nsystem)r   r   r   user)instructionsr   ra   r   r\   r   r   r   r   r   inputr   )rc   rj   r   r   Zprev_msgZoutput_itemr   rh   rh   ri   r   ;  s,   




z0OpenAIServingResponses._construct_input_messagesc                 C   s  g }|d u rh|j r|j jnd }dd |jD }d|v o%| jd uo%| jd}d|v o4| jd uo4| jd}t||rD| jd urD| jdnd |rR| jd urR| jdnd d}|| t|j	|j}	||	 ni| j
|j }
t|
dkr|
d	 }t|tsJ |jd
krd	}tt|
d d	d	D ]}|
| }t|tsJ |jd
kr|} nq|
|d d  }|
|d d = |D ]}t|tsJ |jdkr|
| q||
 t|jtr|t|j |S |d urt|j}ng }|jD ]}|t|| t|tr|| q|S )Nc                 S   s   g | ]}|j qS rh   rv   rx   rh   rh   ri   
<listcomp>i  s    zQOpenAIServingResponses._construct_input_messages_with_harmony.<locals>.<listcomp>rt   r{   ru   r|   )reasoning_effortZbrowser_descriptionZpython_descriptionr   final   r   analysis)r   Zeffortr   rE   r   r%   Zget_tool_descriptionr   r#   r   ra   r   r   r   OpenAIHarmonyMessagechannelranger\   r   r   r&   r   r   r)   r   )rc   rj   r   r   r   Z
tool_typesZenable_browserZenable_code_interpreterZsys_msgZdev_msgZ	prev_msgsZlast_msgZprev_final_msg_idxr   Z
prev_msg_iZrecent_turn_msgsr   Zprev_outputsZresponse_msgrh   rh   ri   r   _  s   





	






z=OpenAIServingResponses._construct_input_messages_with_harmonyc              
      s   z| j |g|R i |I d H }W n  ty2 } ztd|j | t|}W Y d }~nd }~ww t|tro|j}| j	4 I d H   | j
|}|d usOJ |jdvrWd|_W d   I d H  d S 1 I d H shw   Y  d S d S )Nz Background request failed for %s)r   r   failed)r   rT   rW   r   rs   r   r   r   r,   r`   r]   r   r   )rc   rj   argskwargsr   rd   r   r   rh   rh   ri   r     s2   

.z.OpenAIServingResponses._run_background_requestr   c              	      sr   | ds| |S | j4 I d H  | j|}W d   I d H  n1 I d H s)w   Y  |d u r7| |S |S )Nrq   )r   r   r`   r]   r   r   )rc   r   r   rh   rh   ri   retrieve_responses  s   

(
z)OpenAIServingResponses.retrieve_responsesc              	      s  | ds| |S | j4 I d H ? | j|}|d u r,| |W  d   I d H  S |j}|dvrD| jdddW  d   I d H  S d|_W d   I d H  n1 I d H sWw   Y  | j| }r|	  z|I d H  W |S  t
jy   td| Y |S w |S )Nrq   )r   in_progressrl   z%Cannot cancel a synchronous response.rn   ro   r   z$Background task for %s was cancelled)r   r   r`   r]   r   r   r   r   rb   cancelr^   r   rW   r   )rc   r   r   Zprev_statusr   rh   rh   ri   cancel_responses  s:   

(z'OpenAIServingResponses.cancel_responsesc                 C   s   | j dd| ddS )Nrl   zInvalid 'response_id': 'z*'. Expected an ID that begins with 'resp'.r   )r   rc   r   rh   rh   ri   r     s   
z-OpenAIServingResponses._make_invalid_id_errorc                 C   s   | j dd| dtjdS )Nrl   zResponse with id 'z' not found.rm   )r   r   	NOT_FOUNDr   rh   rh   ri   r     s
   
z,OpenAIServingResponses._make_not_found_errorc                 C   s   | j ddtjdS )Nrl   z`store=True` (default) is not supported. Please set `store=False` in Responses API or set `VLLM_ENABLE_RESPONSES_API_STORE=1` in the env var when starting the vLLM server.rm   )r   r   r   )rc   rh   rh   ri   _make_store_not_supported_error  s
   z6OpenAIServingResponses._make_store_not_supported_errorc	                   s  t |ts
td|ptt }d dtf fdd}	d}
d}d}d}tj||||g dd d	 }|	t	d
d|dV  |	t
dd|dV  |2 z3 d H W }t |tsWJ | r|d7 }d}t|jjdkr|jjd }|jd uruny|jdkrtdt|jd jddgd|g d}|	td|d||
|jd jdV  |	tdd||dV  nE|jdkrtd|jd jg d}|	tjdd||
|jd jg |dV  |	tjdd|||
|dV  |	tjdd|t|d d!|gdd"dV  |jjr|jjdkrA|jjd u rA|s/d#}|	tjd$d|tj|d d!g dd"dV  |	tj d%d|||
tjddg g d&d'V  |	tj!d(d|
|||jjg d)V  nJ|jjdkr|jjd u r|s|d#}|	tjd$d|tjd|g dd*dV  |	tj d%d|||
tjddg g d&d'V  |	t"d+|||
|jjdd,V  |# rt|jjdkr|jjd }| j$d ur\| j$%d-r\|jd ur\|j&d.r\|jtd.d  }d }t'(|jd j}|d/krtj)j*d/|d0 d1}n4|d2krtj)j+d3d4|,d5d d6}n|d7kr
tj)j-d7|d8 d4|,d5d d9}nt.d:| |	tjd$d|tj)j/d;||dd<dV  |	tj0d=d||d>V  |	tj1d?d||d>V  |	tj2d@d||d>V  |	tjdd|tj/d;||dd<dV  | j$d ur| j$%dAr|jd ur|j&dAr|	tjd$d|tj3dB|ddCg ddDdV  |	tj4dEd||d>V  |	tj5dFd|||jd jdGV  |	tj6dHd||d>V  |	tj7dId||d>V  |	tjdd|tj3dB||jd jdCg ddDdV  qI6 dJdK }| j8||| |||||dLI d H }|	tj9dMd| dV  d S )NNz=Streaming is not supported for responses API without Harmony.r   eventc                    s>   t | dr | _ d7  t| dd}d| d| jd d dS )	Nsequence_numberr   rw   unknownzevent: z
data: )indentz

)hasattrr  getattrZmodel_dump_json)r  Z
event_typer  rh   ri   _send_event  s   

zFOpenAIServingResponses.responses_stream_generator.<locals>._send_eventr@   Fr   r   zresponse.createdr   )rw   r  r   zresponse.in_progressr   r   r   r   r   r   )rw   r   r   r   r   zresponse.reasoning_text.done)rw   item_idr  output_indexcontent_indexr   zresponse.output_item.done)rw   r  r  itemr   r   )rw   r   r   zresponse.output_text.done)rw   r  r  r  r   r   r
  zresponse.content_part.done)rw   r  r
  r  r  partro   r   )r   rw   r   r   r   Tzresponse.output_item.addedzresponse.content_part.added)rw   r   r   r   )rw   r  r  r
  r  r  zresponse.output_text.delta)rw   r  r  r  r
  r   r   )rw   r   r   r   zresponse.reasoning_text.delta)rw   r
  r  r  r   r  r{   zbrowser.searchquery)rw   r  openZ	open_pagezcursor:cursor)rw   urlfindpattern)rw   r  r  zUnknown function name: Zweb_search_call)rw   r   actionr   z$response.web_search_call.in_progress)rw   r  r  r
  z"response.web_search_call.searchingz"response.web_search_call.completedr|   Zcode_interpreter_callrP   )rw   r   codeZcontainer_idr   r   z*response.code_interpreter_call.in_progressz(response.code_interpreter_call_code.done)rw   r  r  r
  r  z+response.code_interpreter_call.interpretingz(response.code_interpreter_call.completedc                   S  s   d S r   rh   rh   rh   rh   ri   empty_async_generator  s   zPOpenAIServingResponses.responses_stream_generator.<locals>.empty_async_generator)r   zresponse.completed):r   r"   r   r   r   r   r1   r   Z
model_dumpr   r   Zis_expecting_startr   r   r   Z	recipientr   r   r   r   r   r   r   r   openai_responses_typesZResponseTextDoneEventZResponseContentPartDoneEventr   Zlast_content_deltaZcurrent_channelZcurrent_recipientZResponseOutputItemAddedEventZResponseContentPartAddedEventZResponseTextDeltaEventr   Zis_assistant_action_turnrE   r   r   jsonloadsZresponse_function_web_searchZActionSearchZActionOpenPager   Z
ActionFindr   ZResponseFunctionWebSearchZ$ResponseWebSearchCallInProgressEventZ#ResponseWebSearchCallSearchingEventZ#ResponseWebSearchCallCompletedEventZ$ResponseCodeInterpreterToolCallParamZ*ResponseCodeInterpreterCallInProgressEventZ(ResponseCodeInterpreterCallCodeDoneEventZ,ResponseCodeInterpreterCallInterpretingEventZ)ResponseCodeInterpreterCallCompletedEventr   ZResponseCompletedEvent)rc   rj   r   r   r   r   r   r   r   r	  Zcurrent_content_indexZcurrent_output_indexZcurrent_item_idZsent_output_item_addedZinitial_responsectxZprevious_itemr   Ztext_contentZfunction_namer  Zparsed_argsr  Zfinal_responserh   r  ri   r     s"  
	



	











	


	

    M

z1OpenAIServingResponses.responses_stream_generatorr   )/__name__
__module____qualname__r   r   r4   r
   r+   r   r   boolr6   rS   r0   r   r   r   r1   r,   r   r=   r   r   r<   r   r   r/   r   r   r9   listr   r   r    r   r   r   r   r   r   r   r   r   r   r  r   __classcell__rh   rh   rf   ri   r?   D   s$   	
f
 R

	


L
D

$
L




 
	

r?   )er^   r  r   collections.abcr   r   
contextlibr   r   httpr   typingr   r   r	   r
   r   r   Zopenai.types.responsestypes	responsesr  Zfastapir   Zopenair   r   r   r   r   r   r   r   r   r   r   Z.openai.types.responses.response_reasoning_itemr   r   Zopenai_harmonyr   r   Zvllmr   Zvllm.configr   Zvllm.engine.protocolr   Zvllm.entrypoints.chat_utilsr   r   Zvllm.entrypoints.contextr   r    r!   r"   Zvllm.entrypoints.harmony_utilsr#   r$   r%   r&   r'   r(   r)   r*   Zvllm.entrypoints.loggerr+   Z vllm.entrypoints.openai.protocolr,   r-   r.   r/   r0   r1   r2   Z&vllm.entrypoints.openai.serving_enginer3   Z&vllm.entrypoints.openai.serving_modelsr4   Zvllm.entrypoints.tool_serverr5   r6   Zvllm.inputs.datar7   r   Zvllm.loggerr8   Zvllm.outputsr9   Zvllm.reasoningr:   r;   Zvllm.sampling_paramsr<   Z!vllm.transformers_utils.tokenizerr=   Z
vllm.utilsr>   r  rW   r?   rh   rh   rh   ri   <module>   sF   0
($