o
    )ib                    @   s  U d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ zd dlmZ W n eyb   d d	lm Z Y nw d d
l!m"Z" d dl#m$Z$ d dl%m&Z&m'Z' d dl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5m6Z6 d dl7m8Z8m9Z9 d dl:m;Z; d dl<m=Z= d dl>m?Z?m@Z@mAZAmBZB d dlCmDZD d dlEmFZFmGZG e;eHZIeJejKZLG dd de)ZMG dd deMZNG dd deMZOG dd deMZPG dd  d eMZQG d!d" d"eMZRG d#d$ d$eMZSG d%d& d&eMZTG d'd( d(e)ZUG d)d* d*eMZVG d+d, d,eMZWG d-d. d.eMZXG d/d0 d0eMZYe
eYeXf ZZG d1d2 d2eMZ[G d3d4 d4eMZ\G d5d6 d6eMZ]G d7d8 d8eMZ^G d9d: d:eMZ_G d;d< d<e)Z`eae
ebe`f  Zcd=e	ec d>e	eb d?e	eae  fd@dAZde
eeef Zee1efdB< G dCdD dDeMZgG dEdF dFeMZhG dGdH dHeMZiG dIdJ dJeMZjG dKdL dLeMZke
ejekf ZlejZmekZne
emenf ZoG dMdN dNeMZpG dOdP dPeMZqG dQdR dRe)ZrG dSdT dTe)ZsG dUdV dVe)ZtG dWdX dXeMZuG dYdZ dZeMZvG d[d\ d\eMZwG d]d^ d^eMZxG d_d` d`eMZyG dadb dbeMZzG dcdd ddeMZ{G dedf dfeMZ|G dgdh dheMZ}G didj djeMZ~G dkdl dleMZG dmdn dneMZG dodp dpeMZG dqdr dreMZG dsdt dteMZG dudv dveMZG dwdx dxeMZG dydz dze)ZG d{d| d|eMZG d}d~ d~e)ZG dd deMZG dd deMZG dd deZG dd deMZG dd deMZG dd deMZG dd deMZG dd deMZG dd deMZG dd deMZG dd deMZG dd deMZG dd deMZG dd deMZG dd deMZe
ehelepeqf ZG dd deMZG dd deMZG dd deMZG dd deMZG dd deMZe
eef ZG dd deMZG dd deMZG dd deMZG dd deMZG dd de)ZG dd de)Zed Ze1efd< G dd deMZG dd deMZG dd deMZG dd deMZG dd deMZG dd deMZG dd deMZG ddĄ deMZG ddƄ deMZG ddȄ deMZG ddʄ deMZG dd̄ deMZdS )    N)
HTTPStatus)	AnnotatedAnyClassVarLiteralOptionalUnion)HTTPException
UploadFile)ChatCompletionAudio)
Annotation)ResponseFunctionToolCallResponseInputItemParamResponseOutputItemResponsePromptResponseReasoningItemResponseStatus)ResponseTextConfig)ResponseFormatTextConfig)
ToolChoice)Tool)Metadata	Reasoning)	BaseModel
ConfigDictFieldTypeAdapterValidationInfofield_validatormodel_validator)	TypeAlias)envs)ChatCompletionMessageParamrandom_tool_call_id)ScoreContentPartParamScoreMultiModalParam)init_logger)PoolingParams)BeamSearchParamsGuidedDecodingParamsRequestOutputKindSamplingParams)Logprob)random_uuidresolve_obj_by_qualnamec                   @   sF   e Zd ZU eddZdZeeee	   e
d< eddedd ZdS )	OpenAIBaseModelallowextraNfield_nameswrapmodec                    s   ||}t |ts|S | j  d u r4t  | j D ]\}} | t|dd  }r0 | q | _t fdd|D rIt	
d|    |S )Naliasc                 3   s    | ]}| vV  qd S N ).0kr3   r9   l/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/entrypoints/openai/protocol.py	<genexpr>P   s    z7OpenAIBaseModel.__log_extra_fields__.<locals>.<genexpr>z@The following fields were present in the request but ignored: %s)
isinstancedictr3   setZmodel_fieldsitemsaddgetattranyloggerwarningkeys)clsdatahandlerresult
field_namefieldr7   r9   r<   r=   __log_extra_fields__?   s$   



z$OpenAIBaseModel.__log_extra_fields__)__name__
__module____qualname__r   model_configr3   r   r   rA   str__annotations__r   classmethodrO   r9   r9   r9   r=   r/   8   s   
 
r/   c                   @   s6   e Zd ZU eed< eed< dZee ed< eed< dS )	ErrorInfomessagetypeNparamcode)rP   rQ   rR   rT   rU   rZ   r   intr9   r9   r9   r=   rW   Y   s
   
 rW   c                   @      e Zd ZU eed< dS )ErrorResponseerrorN)rP   rQ   rR   rW   rU   r9   r9   r9   r=   r^   `      
 r^   c                   @   s   e Zd ZU edd dZeed< dZeed< edd dZe	ed< d	Z
eed
< dZeed< dZeed< d	Zeed< dZeed< d	Zeed< dZeed< dZee ed< d	Zeed< dS )ModelPermissionc                   C      dt   S )Nz
modelperm-r-   r9   r9   r9   r=   <lambda>e       zModelPermission.<lambda>default_factoryidZmodel_permissionobjectc                   C      t t S r8   r\   timer9   r9   r9   r=   rd   g   re   createdFallow_create_engineTallow_samplingallow_logprobsallow_search_indices
allow_viewallow_fine_tuning*organizationNgroupis_blocking)rP   rQ   rR   r   rh   rT   rU   ri   rm   r\   rn   boolro   rp   rq   rr   rs   ru   rv   r   rw   r9   r9   r9   r=   ra   d   s   
 ra   c                   @   s   e Zd ZU eed< dZeed< edd dZeed< dZ	eed	< d
Z
ee ed< d
Zee ed< d
Zee ed< eedZee ed< d
S )	ModelCardrh   modelri   c                   C   rj   r8   rk   r9   r9   r9   r=   rd   v   re   zModelCard.<lambda>rf   rm   vllmowned_byNrootparentmax_model_len
permission)rP   rQ   rR   rT   rU   ri   r   rm   r\   r|   r}   r   r~   r   listr   ra   r9   r9   r9   r=   ry   s   s   
 ry   c                   @   s0   e Zd ZU dZeed< eedZee	 ed< dS )	ModelListr   ri   rf   rJ   N)
rP   rQ   rR   ri   rT   rU   r   r   rJ   ry   r9   r9   r9   r=   r   ~   s   
 r   c                   @   s   e Zd ZU dZee ed< dS )PromptTokenUsageInfoNcached_tokens)rP   rQ   rR   r   r   r\   rU   r9   r9   r9   r=   r      s   
 r   c                   @   sF   e Zd ZU dZeed< dZeed< dZee ed< dZ	ee
 ed< dS )	UsageInfor   prompt_tokenstotal_tokenscompletion_tokensNprompt_tokens_details)rP   rQ   rR   r   r\   rU   r   r   r   r   r   r9   r9   r9   r=   r      s
   
 r   c                   @   s&   e Zd ZU eed< dZee ed< dS )RequestResponseMetadata
request_idNfinal_usage_info)rP   rQ   rR   rT   rU   r   r   r   r9   r9   r9   r=   r         
 r   c                   @   sV   e Zd ZU eed< dZee ed< edddZee	ee
f  ed< dZee ed< dS )JsonSchemaResponseFormatnameNdescriptionschemadefaultr7   json_schemastrict)rP   rQ   rR   rT   rU   r   r   r   r   r@   r   r   rx   r9   r9   r9   r=   r      s
   
  r   c                   @   s>   e Zd ZU eed< edddZeeee	f  ed< eed< dS )StructuralTagbeginNr   r   structural_tag_schemaend)
rP   rQ   rR   rT   rU   r   r   r   r@   r   r9   r9   r9   r=   r      s   
 r   c                   @   s2   e Zd ZU ed ed< ee ed< ee ed< dS )StructuralTagResponseFormatstructural_tagrY   
structurestriggersN)rP   rQ   rR   r   rU   r   r   rT   r9   r9   r9   r=   r      s   
 r   c                   @   s*   e Zd ZU ed ed< dZee ed< dS )ResponseFormat)textjson_objectr   rY   Nr   )rP   rQ   rR   r   rU   r   r   r   r9   r9   r9   r=   r      s   
 r   c                   @   s.   e Zd ZU dZee ed< dZee ed< dS )StreamOptionsTinclude_usageFcontinuous_usage_statsN)rP   rQ   rR   r   r   rx   rU   r   r9   r9   r9   r=   r         
 r   c                   @   s>   e Zd ZU eed< dZee ed< dZeeee	f  ed< dS )FunctionDefinitionr   Nr   
parameters)
rP   rQ   rR   rT   rU   r   r   r   r@   r   r9   r9   r9   r=   r         
 r   c                   @   s&   e Zd ZU dZed ed< eed< dS )ChatCompletionToolsParamfunctionrY   N)rP   rQ   rR   rY   r   rU   r   r9   r9   r9   r=   r      s   
 r   c                   @   r]   )ChatCompletionNamedFunctionr   NrP   rQ   rR   rT   rU   r9   r9   r9   r=   r      r`   r   c                   @   s&   e Zd ZU eed< dZed ed< dS )"ChatCompletionNamedToolChoiceParamr   rY   N)rP   rQ   rR   r   rU   rY   r   r9   r9   r9   r=   r      r   r   c                   @   sL   e Zd ZU eed< dZeee  ed< dZ	ee
eef  ed< eddZdS )LogitsProcessorConstructorqualnameNargskwargsZforbidr1   )rP   rQ   rR   rT   rU   r   r   r   r   r   r@   r   rS   r9   r9   r9   r=   r      s
   
 r   
processorspatternreturnc                 C   s   | rZ|rZg }| D ]O}t |tr|n|j}t||s"td| dzt|}W n ty? } ztd| d| |d }~ww t |trR||j	pJg i |j
pOi }|| q|S | r`tdd S )NzLogits processor 'ze' is not allowed by this server. See --logits-processor-pattern engine argument for more information.z' could not be resolved: zThe `logits_processors` argument is not supported by this server. See --logits-processor-pattern engine argugment for more information.)r?   rT   r   rematch
ValueErrorr.   	Exceptionr   r   r   append)r   r   logits_processors	processorr   Zlogits_processorer9   r9   r=   get_logits_processors   sB   

r   ResponseInputOutputItemc                   @   sT  e Zd ZU dZee ed< dZeee	d f  ed< e
eee f ed< dZee ed< dZee ed< dZee ed	< dZee ed
< dZee ed< dZee ed< dZee ed< dZee ed< dZee ed< dZe	d ed< dZee ed< dZee ed< dZee ed< dZee  ed< dZ!e"ed< e#edZ$ee% ed< dZ&ee ed< dZ'ee ed< dZ(ee	d  ed < dZ)ee ed!< e#d"d# d$d%Z*eed&< e#dd'd(Z+ee,ee-f  ed)< e#dd*d(Z.eed+< e#dd,d(Z/ee ed-< d.d.d/Z0	d=d0ed1ee, d2e1fd3d4Z2e3d5d6d7d8 Z4e3d5d6d9d: Z5e3d5d6d;d< Z6dS )>ResponsesRequestF
backgroundN)zcode_interpreter_call.outputsz%computer_call_output.output.image_urlzfile_search_call.resultszmessage.input_image.image_urlzmessage.output_text.logprobszreasoning.encrypted_contentincludeinputinstructionsmax_output_tokensmax_tool_callsmetadatarz   Tparallel_tool_callsprevious_response_idprompt	reasoningautor   r   Zflexscalepriorityservice_tierstorestreamtemperaturer   tool_choicerf   toolsr   top_logprobstop_pdisabledr   r   
truncationuserc                   C   rb   NZresp_rc   r9   r9   r9   r=   rd   %  re   zResponsesRequest.<lambda>The request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response.rg   r   r   .Additional kwargs to pass to the HF processor.r   r   mm_processor_kwargsThe priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.r   P  If specified, the prefix cache will be salted with the provided string to prevent an attacker to guess prompts in multi-user environments. The salt should be random, protected from access by 3rd parties, and long enough to be unpredictable (e.g., 43 characters base64-encoded, corresponding to 256 bit). Not supported by vLLM engine V0.
cache_salt      ?)r   r   default_max_tokensdefault_sampling_paramsr   c           	   	   C   s   | j d u r|}nt| j |}|pi }| j }d u r"|d| jd }| j }d u r2|d| jd }|d}d }| jd ur^| jjd ur^| jj}|jdkrUt	j
|jd}n	|jdkr^tdtj
|||| j|| jrotj|dS tj|dS )	Nr   r   stop_token_idsr   )jsonr   zjson_object is not supported)r   r   
max_tokenslogprobsr   output_kindguided_decoding)r   minr   get_DEFAULT_SAMPLING_PARAMSr   r   formatrY   r)   from_optionalZschema_NotImplementedErrorr+   r   r   r*   DELTA
FINAL_ONLY)	selfr   r   r   r   r   r   r   response_formatr9   r9   r=   to_sampling_paramsF  sF   





z#ResponsesRequest.to_sampling_paramsbeforer5   c                 C   s&   | ds|S | ddstd|S )Nr   r   Tz0background can only be used when `store` is truer   r   rI   rJ   r9   r9   r=   validate_backgroundo  s   
z$ResponsesRequest.validate_backgroundc                 C   s   | dd urtd|S )Nr   z prompt template is not supportedr   r   r9   r9   r=   validate_promptx  s   z ResponsesRequest.validate_promptc                 C   >   | dd urtjstdt|d tr|d std|S Nr   zYParameter 'cache_salt' is not supported with this instance of vLLM, which uses engine V0.z>Parameter 'cache_salt' must be a non-empty string if provided.r   r!   ZVLLM_USE_V1r   r?   rT   r   r9   r9   r=   check_cache_salt_support~  s   z)ResponsesRequest.check_cache_salt_supportr8   )7rP   rQ   rR   r   r   rx   rU   r   r   r   r   rT   r   r   r   r\   r   r   r   rz   r   r   r   r   r   r   r   r   r   r   floatr   r   r   r   r   r   r   r   r   r   r   r   r   r@   r   r   r   r   r+   r   r   r   r   r   r9   r9   r9   r=   r      s~   
 



)

r   c                   @   s  e Zd ZU ee ed< dZee ed< dZ	ee
 ed< dZeeee
f  ed< dZee ed< d	Zee ed
< edddZee ed< dZee ed< dZee ed< dZee
 ed< dZee ed< edejejdZee ed< g Zeeeee f  ed< dZee ed< dZee  ed< dZ!ee
 ed< dZ"ee
 ed< dZ#eee$  ed< dZ%eee&d e&d e&d e'f  ed< dZ(ee&d  ed < d!Z)eed"< dZ*ee ed#< dZ+ee ed$< dZ,ee ed%< dZ-eed&< dZ.ee ed'< dZ/ee
 ed(< dZ0ee
 ed)< d*Z1e
ed+< g Z2eee  ed,< dZ3eed-< dZ4eed.< d	Z5eed/< d!Z6eed0< d!Z7eed1< dZ8ee9eedd2f  ed3< dZ:ee ed4< dZ;eee  ed5< eed6Z<ee ed7< edd8d9Z=eed:< ed!d;d9Z>eed<< edd=d9Z?eed>< edd?d9Z@eed@< eddAd9ZAeeeeef   edB< eddCd9ZBee edD< eddEd9ZCeeeeDf  edF< eddGd9ZEeeeeDf  edH< eddId9ZFeeeeeGf  edJ< eddKd9ZHee edL< eddMd9ZIeee  edN< eddOd9ZJee edP< eddQd9ZKee edR< eddSd9ZLee edT< eddUd9ZMee edV< ed	dWd9ZNeedX< edYdZ d[d\ZOeed]< edd^d9ZPeeQ ed_< edd`d9ZRee eda< eddbd9ZSee edc< edddd9ZTeeeeDf  ede< eddfd9ZUeeeeeee
f f  edg< d*d*d*d	ddhZVeedi< dedjedkeWfdldmZXdednee djedkeYfdodpZZdkeeeeeGf  fdqdrZ[e\dsdte]dudv Z^e\dsdte]dwdx Z_e\dsdte]dydz Z`e\dsdte]d{d| Zae\dsdte]d}d~ Zbe\dsdte]dd ZcdS )ChatCompletionRequestmessagesNrz           frequency_penalty
logit_biasFr   r   r   zDmax_tokens is deprecated in favor of the max_completion_tokens field)r   
deprecatedr   max_completion_tokens   npresence_penaltyr   geleseedstopr   stream_optionsr   r   r   noner   requiredr   )lowZmediumhighreasoning_effortTinclude_reasoningr   r   best_ofuse_beam_searchtop_kmin_prepetition_penaltyr   length_penaltyr   include_stop_str_in_output
ignore_eos
min_tokensskip_special_tokensspaces_between_special_tokensr  truncate_prompt_tokensprompt_logprobsallowed_token_idsrf   	bad_wordszaIf true, the new message will be prepended with the last message if they belong to the same role.r   echoIf true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.add_generation_prompt:  If this is set, the chat will be formatted so that the final message in the chat is open-ended, without any EOS tokens. The model will continue this message rather than starting a new one. This allows you to "prefill" part of the model's response for it. Cannot be used at the same time as `add_generation_prompt`.continue_final_messageIf true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).add_special_tokensa#  A list of dicts representing documents that will be accessible to the model if it is performing RAG (retrieval-augmented generation). If the template does not support RAG, this argument will have no effect. We recommend that each document should be a dict containing "title" and "text" keys.	documentsA Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one.chat_templatebAdditional keyword args to pass to the template renderer. Will be accessible by the chat template.chat_template_kwargsr   r   5If specified, the output will follow the JSON schema.guided_json7If specified, the output will follow the regex pattern.guided_regex<If specified, the output will be exactly one of the choices.guided_choice>If specified, the output will follow the context free grammar.guided_grammarz?If specified, the output will follow the structural tag schema.r   zIf specified, will override the default guided decoding backend of the server for this specific request. If set, must be either 'outlines' / 'lm-format-enforcer'guided_decoding_backendTIf specified, will override the default whitespace pattern for guided json decoding.guided_whitespace_patternr   r   c                   C      t   S r8   rc   r9   r9   r9   r=   rd   ,      zChatCompletionRequest.<lambda>r   r   r     A list of either qualified names of logits processors, or constructor objects, to apply when sampling. A constructor is a JSON object with a required 'qualname' field specifying the qualified name of the processor class/factory, and optional 'args' and 'kwargs' fields containing positional and keyword arguments. For example: {'qualname': 'my_module.MyLogitsProcessor', 'args': [1, 2], 'kwargs': {'param': 'value'}}.r   If specified with 'logprobs', tokens are represented  as strings of the form 'token_id:{token_id}' so that tokens that are not JSON-encodable can be identified.return_tokens_as_token_idsr   r   5KVTransfer parameters used for disaggregated serving.kv_transfer_paramsWAdditional request parameters with string or numeric values, used by custom extensions.
vllm_xargsr  r   r   r  r  r   r   r   c                 C   sN   | j d ur| j nd}| j }d u r|d| jd }t||| j|| j| jdS )Nr  r   Z
beam_widthr   r  r   r  r  )r	  r   r   r   r(   r  r  r  r   r   r   r	  r   r9   r9   r=   to_beam_search_paramsa  s   
z+ChatCompletionRequest.to_beam_search_paramslogits_processor_patternc              
   C   s  | j  }d u r|d| jd }| j }d u r |d| jd }| j }d u r0|d| jd }| j }d u r@|d| jd }| j }d u rP|d| jd }| j}	|	d u r]| jr]| j	}	d }
| j
d ur| j
jdkrmd}
n5| j
jdkr| j
j}|d us}J |j| _n | j
jd	kr| j
}|d urt|tsJ |jdd
}t|| _tj|  p| j| j| j| j|
| j| j| jd}| jr| jni }| jr| j|d< tjd$i d| jd| j d| j!d| j"d|d|d|d|d|d| j#d| j$d| j%d| j&r| j	nd d|	d| j'd|d| j(d| j)d| j*dt+| j,|d| j-d| j.d| j/r0t0j1nt0j2d|d | j3d!| j4d"| j5d#|pJd S S )%Nr  r   r   r  r  r   Tr   r   )Zby_alias)r   regexchoicegrammarr   backendwhitespace_patternr   rD  r	  r  r
  r  r  r  r   r   r$  r  r   r  r   r!  r   r  r#  r   r   r  r&  r%  
extra_argsr9   )6r  r   r   r   r   r  r  r$  r'  r   r   rY   r   r4  r?   r   Z
model_dumpr   dumpsr   r)   r   _get_guided_json_from_toolr6  r8  r:  r;  r=  rF  rD  r+   r	  r  r
  r  r  r  r   r   r  r  r   r!  r   r   r  r#  r   r*   r   r   r  r&  r%  )r   r   rK  r   r  r   r   r  r  r$  guided_json_objectr   r   Z	s_tag_objr   rQ  r9   r9   r=   r   s  s   








	

z(ChatCompletionRequest.to_sampling_paramsc                    s   | j dks
| jd u rd S t| j tu r3| j jj}dd | jD }||vr,td| d|| }|jS | j dkrjdtdt	fd	d
 dt
t dt	fdd}ddd fdd| jD dd}|| j}|rh||d< |S d S )Nr  c                 S   s   i | ]}|j j|j qS r9   )r   r   r:   toolr9   r9   r=   
<dictcomp>  s    zDChatCompletionRequest._get_guided_json_from_tool.<locals>.<dictcomp>zTool 'z!' has not been passed in `tools`.r  rV  r   c                 S   s6   d| j jgd| j jr| j jndi ddddgdS )	Nstring)rY   enumri   )rY   
properties)r   r   r   r   )rZ  r  )r   r   r   )rV  r9   r9   r=   get_tool_schema  s   	
zIChatCompletionRequest._get_guided_json_from_tool.<locals>.get_tool_schemar   c                 S   s   t tt ttf f  }| D ]0}|jjd u rq|jjdi }| D ]\}}||v r8|| |kr8td| d|||< q"q|S )N$defszTool definition 'z/' has multiple schemas, which is not supported.)r@   rT   r   r   r   poprB   r   )r   Zall_defsrV  ZdefsZdef_nameZ
def_schemar9   r9   r=   get_tool_schema_defs  s"   


	zNChatCompletionRequest._get_guided_json_from_tool.<locals>.get_tool_schema_defsarrayr  ri   c                    s   g | ]} |qS r9   r9   rU  r[  r9   r=   
<listcomp>  s    zDChatCompletionRequest._get_guided_json_from_tool.<locals>.<listcomp>)rY   ZanyOf)rY   ZminItemsrB   r\  )r   r   rY   r   r   r   r   r   r   r@   r   )r   Z	tool_namer   rV  r^  r   Zjson_schema_defsr9   r`  r=   rS    s:   




z0ChatCompletionRequest._get_guided_json_from_toolr   r5   c                 C       | dr| dstd|S Nr  r   6Stream options can only be defined when `stream=True`.r   r   r9   r9   r=   validate_stream_options  
   z-ChatCompletionRequest.validate_stream_optionsc                 C   s|   | d }d ur| dr|dkrtd|dk rtd| d }d ur<|dk r/td|dkr<| ds<td	|S )
Nr$  r   r   7`prompt_logprobs` are not available when `stream=True`.+`prompt_logprobs` must be a positive value.r   z(`top_logprobs` must be a positive value.r   z:when using `top_logprobs`, `logprobs` must be set to true.r   )rI   rJ   r$  r   r9   r9   r=   check_logprobs#  s   z$ChatCompletionRequest.check_logprobsc                 C   s   t |tr|td|v o|d d ud|v o|d d ud|v o#|d d ug}|dkr.td|dkr>|dddvr>td	|S )
Nr4  r6  r8  r  `You can only use one kind of guided decoding ('guided_json', 'guided_regex' or 'guided_choice').r   r  )r  r   r  z;You can only either use guided decoding or tools, not both.)r?   r   sumr   rI   rJ   Zguide_countr9   r9   r=   check_guided_decoding_count9  s    
z1ChatCompletionRequest.check_guided_decoding_countc                 C   s  d|vr| drd|d< d|v r|d dkr|S d|v r|d d urd|vs-|d d u r1td|d dvrHt|d tsHtd|d  d|d d	krft|d trft|d d
krfd|d< |d= |S d}t|d trd}|d  d}t|tstd| d| d|vrtd| |d }t|trt|d
krtd| d| |d D ]}|d d |krd} nq|std|S )Nr   r   r   r  z.When using `tool_choice`, `tools` must be set.)r   r  z!Invalid value for `tool_choice`: z?! Only named tools, "none", "auto" or "required" are supported.r  r   zJCorrect usage: `{"type": "function", "function": {"name": "my_function"}}`Fr   zInvalid value for `function`: `z` in `tool_choice`! r   z6Expected field `name` in `function` in `tool_choice`! zInvalid `name` in `function`: `TzOThe tool specified in `tool_choice` does not match any of the specified `tools`)r   r   r?   r@   r   lenrT   )rI   rJ   Zcorrect_usage_messageZ
valid_toolr   Zfunction_namerV  r9   r9   r=   check_tool_usageS  sp   	
z&ChatCompletionRequest.check_tool_usagec                 C       | dr| drtd|S Nr+  r)  zMCannot set both `continue_final_message` and `add_generation_prompt` to True.r   r   r9   r9   r=   check_generation_prompt  
   z-ChatCompletionRequest.check_generation_promptc                 C   r   r   r   r   r9   r9   r=   r        z.ChatCompletionRequest.check_cache_salt_support)drP   rQ   rR   r   r"   rU   rz   r   rT   r  r   r  r@   r   rx   r   r\   r   r   r  r	  r
  r   AnyResponseFormat
_LONG_INFOr   maxr  r  r   r   r  r   r   r   r   r   r   r   r   r  r  r   r   r  r  r  r  r  r  r   r  r  r  r   r!  r#  r   r$  r%  r&  r'  r)  r+  r-  r.  r0  r2  r   r   r4  r   r6  r8  r:  r   r;  r=  r   r   r   LogitsProcessorsrB  r   rD  rF  r   r(   rJ  r+   r   rS  r   rV   re  ri  rm  ro  rr  r   r9   r9   r9   r=   r    sj  
 
				$


]
JEr  c                	   @   s  e Zd ZU dZee ed< dZeee	e
 e	e	e
  ee	e f  ed< dZeeee	e f  ed< dZee
 ed< dZee ed< dZee ed	< dZeeeef  ed
< dZee
 ed< dZee
 ed< dZe
ed< dZee ed< edejejdZee
 ed< g Zeeee	e f  ed< dZee ed< dZee  ed< dZ!ee ed< dZ"ee ed< dZ#ee ed< dZ$ee ed< dZ%eed< dZ&ee
 ed< dZ'ee ed< dZ(ee ed< dZ)eed< g Z*ee	e
  ed < dZ+eed!< dZ,eed"< d#Z-e
ed$< d%Z.eed&< d%Z/eed'< dZ0ee1e
edd(f  ed)< dZ2ee	e
  ed*< dZ3ee
 ed+< ed%d,d-Z4eed.< edd/d-Z5ee6 ed0< edd1d-Z7eeeee8f  ed2< edd3d-Z9ee ed4< edd5d-Z:ee	e  ed6< edd7d-Z;ee ed8< edd9d-Z<ee ed:< edd;d-Z=ee ed<< ed#d=d-Z>e
ed>< ed?d@ dAdBZ?eedC< eddDd-Z@eeA edE< eddFd-ZBee edG< eddHd-ZCee edI< eddJd-ZDeeeeEf  edK< eddLd-ZFeeeeee
ef f  edM< dddd#ddNZGeedO< 	dcde
dPee dQeHfdRdSZI	dcde
dTee dPee dQeJfdUdVZKeLdWdXeMdYdZ ZNeLdWdXeMd[d\ ZOeLdWdXeMd]d^ ZPeLdWdXeMd_d` ZQeLdWdXeMdadb ZRdS )dCompletionRequestNrz   r   prompt_embedsr  Fr'  r  r  r  r      r   r  r	  r
  r  r  r  r   r  suffixr   r   r   r  r  r  r  r   r  r   r  r  r   r  Tr   r!  r"  r#  r%  r$  MIf true (the default), special tokens (e.g. BOS) will be added to the prompt.r   r-  zSimilar to chat completion, this parameter specifies the format of output. Only {'type': 'json_object'}, {'type': 'json_schema'}, {'type': 'structural_tag'}, or {'type': 'text' } is supported.r   r3  r4  r5  r6  r7  r8  r9  r:  zIf specified, will override the default guided decoding backend of the server for this specific request. If set, must be one of 'outlines' / 'lm-format-enforcer'r;  r<  r=  r   r   c                   C   r>  r8   rc   r9   r9   r9   r=   rd     r?  zCompletionRequest.<lambda>r   r   r   r@  r   rA  rB  r   r   rC  rD  rE  rF  rG  r   r   r   c                 C   sT   |d u ri }| j d ur| j nd}| j }d u r|dd}t||| j|| j| jdS )Nr  r   r   rH  )r	  r   r   r(   r  r  r  rI  r9   r9   r=   rJ  I  s   z'CompletionRequest.to_beam_search_paramsrK  c              	   C   s0  |d u ri }| j  }d u r|d| jd }| j }d u r&|d| jd }| j }d u r6|d| jd }| j }d u rF|d| jd }| j }d u rV|d| jd }| j}	|	d u rc| jrc| j	}	| joj| j
dk}
d }| jd urz| jjdkrzd}tj| j| j| j| j|| j| jd	}| jr| jni }| jr| j|d
< tjd"i d| jd| jd| jd| jd|d|d|d|d|d| jd| jd| jd| j	d| jd|
s|ndd| j d|	d| j!d| j"d| j#dt$| j%|d| j&d| j'rt(j)nt(j*d|d| j+d | j,d!|pd S S )#Nr  r   r   r  r  r   r   T)r   rL  rM  rN  r   rO  rP  rD  r	  r  r
  r  r  r  r   r   r  r   r  r  r$  r   r!  r  r   r#  r   r   r  r%  rQ  r9   )-r  r   r   r   r   r  r  r$  r'  r   r   r   rY   r)   r   r4  r6  r8  r:  r;  r=  rF  rD  r+   r	  r  r
  r  r  r  r   r  r  r   r!  r  r   r   r#  r   r*   r   r   r  r%  )r   r   rK  r   r  r   r   r  r  r$  Zecho_without_generationrT  r   rQ  r9   r9   r=   r   _  s   







	


z$CompletionRequest.to_sampling_paramsr   r5   c                 C   sR   t d|v o
|d d ud|v o|d d ud|v o|d d ug}|dkr'td|S )Nr4  r6  r8  r  rj  )rk  r   rl  r9   r9   r=   rm    s   z-CompletionRequest.check_guided_decoding_countc                 C   sb   | d }d ur| dr|dkrtd|dk rtd| d }d ur/|dk r/td|S )Nr$  r   r   rg  rh  r   z$`logprobs` must be a positive value.r   )rI   rJ   r$  r   r9   r9   r=   ri    s   z CompletionRequest.check_logprobsc                 C   rb  rc  r   r   r9   r9   r=   re    rf  z)CompletionRequest.validate_stream_optionsc                 C   s(   | dd u r| dd u rtd|S )Nr   rz  z8At least one of `prompt` or `prompt_embeds` must be set.r   r   r9   r9   r=   !validate_prompt_and_prompt_embeds  s
   z3CompletionRequest.validate_prompt_and_prompt_embedsc                 C   r   r   r   r   r9   r9   r=   r     rt  z*CompletionRequest.check_cache_salt_supportr8   )SrP   rQ   rR   rz   r   rT   rU   r   r   r   r\   rz  bytesr  r'  rx   r  r   r  r@   r   r   r	  r
  r   rv  r   rw  r  r  r   r  r   r|  r   r   r   r  r  r  r  r  r   r  r  r  r   r!  r#  r   r%  r$  r-  r   ru  r4  r   r6  r8  r:  r;  r=  r   r   r   rx  rB  r   rD  r   rF  r   r(   rJ  r+   r   r   rV   rm  ri  re  r~  r   r9   r9   r9   r=   ry    s
  
 ,
$


Vry  c                   @   s   e Zd ZU dZee ed< eee	 eee	  eee f ed< dZ
ed ed< dZee	 ed< dZee ed< dZeee	ed	d
f  ed< edddZeed< edddZe	ed< edd ddZeed< dZee ed< dd ZdS )EmbeddingCompletionRequestNrz   r   r   r   base64encoding_format
dimensionsr   r"  r#  Tr}  r   r-  r   r   r   c                   C   r>  r8   rc   r9   r9   r9   r=   rd     r?  z#EmbeddingCompletionRequest.<lambda>r   r   r   	normalizec                 C      t | j| jdS N)r  r  r'   r  r  r   r9   r9   r=   to_pooling_params     z,EmbeddingCompletionRequest.to_pooling_params)rP   rQ   rR   rz   r   rT   rU   r   r   r\   r  r   r  r   r#  r   r   r-  rx   r   r   r  r  r9   r9   r9   r=   r    s*   
 $r  c                   @   s>  e Zd ZU dZee ed< ee ed< dZ	e
d ed< dZee ed< dZee ed< dZeeeed	d
f  ed< edddZeed< edddZee ed< edddZeeeef  ed< edddZeeeef  ed< edddZeed< edd ddZeed< dZee ed< edd ed!d" Zd#d$ ZdS )%EmbeddingChatRequestNrz   r  r   r  r  r  r   r  r"  r#  Fr,  r   r-  r/  r0  r1  r2  r   r   r   r   r   c                   C   r>  r8   rc   r9   r9   r9   r=   rd   H  r?  zEmbeddingChatRequest.<lambda>r   r   r   r  r   r5   c                 C   rp  rq  r   r   r9   r9   r=   rr  Q  rs  z,EmbeddingChatRequest.check_generation_promptc                 C   r  r  r  r  r9   r9   r=   r  Z  r  z&EmbeddingChatRequest.to_pooling_params)rP   rQ   rR   rz   r   rT   rU   r   r"   r  r   r  r\   r   r#  r   r   r-  rx   r0  r2  r@   r   r   r   r   r  r   rV   rr  r  r9   r9   r9   r=   r    sH   
 	r  c                   @   s   e Zd ZU dZee ed< eee ee	f ed< eee ee	f ed< dZ
eeeeddf  ed< eddd	Zeeeef  ed
< eddd	Zeed< dZee ed< dd ZdS )ScoreRequestNrz   Ztext_1Ztext_2r  r"  r#  r   r   r   r   r   r   
activationc                 C      t | jdS N)r  r'   r  r  r9   r9   r=   r       zScoreRequest.to_pooling_params)rP   rQ   rR   rz   r   rT   rU   r   r   r%   r#  r   r\   r   r   r@   r   r   r  rx   r  r9   r9   r9   r=   r  f  s   
 r  c                   @   s   e Zd ZU dZee ed< eeef ed< ee	e ef ed< e
dd dZeed< dZeeee
d	d
f  ed< e
dddZeeeef  ed< e
dddZeed< dZee ed< dd ZdS )RerankRequestNrz   queryr.  c                   C   s   dS )Nr   r9   r9   r9   r9   r=   rd     s    zRerankRequest.<lambda>rf   top_nr  r"  r#  r   r   r   r   r   r   r  c                 C   r  r  r  r  r9   r9   r=   r    r  zRerankRequest.to_pooling_params)rP   rQ   rR   rz   r   rT   rU   r   r%   r   r   r  r\   r#  r   r   r@   r   r   r  rx   r  r9   r9   r9   r=   r    s    
 r  c                   @   s.   e Zd ZU dZee ed< dZee ed< dS )RerankDocumentNr   multi_modal)	rP   rQ   rR   r   r   rT   rU   r  r$   r9   r9   r9   r=   r    r   r  c                   @   s&   e Zd ZU eed< eed< eed< dS )RerankResultindexZdocumentZrelevance_scoreN)rP   rQ   rR   r\   rU   r  r   r9   r9   r9   r=   r    s   
 r  c                   @   r]   )RerankUsager   NrP   rQ   rR   r\   rU   r9   r9   r9   r=   r    r`   r  c                   @   s2   e Zd ZU eed< eed< eed< ee ed< dS )RerankResponserh   rz   usageresultsN)rP   rQ   rR   rT   rU   r  r   r  r9   r9   r9   r=   r    s
   
 r  c                   @   sv   e Zd ZU eedZee ed< eedZee	e
  ed< eedZee ed< eedZee	eee
f   ed< dS )CompletionLogProbsrf   text_offsettoken_logprobstokensr   N)rP   rQ   rR   r   r   r  r\   rU   r  r   r   r  rT   r   r@   r9   r9   r9   r=   r    s   
 
r  c                   @   s~   e Zd ZU eed< eed< dZee ed< dZ	ee ed< e
dddZeeeef  ed< dZeeeeeef    ed	< dS )
CompletionResponseChoicer  r   Nr   finish_reasonThe stop string or token id that caused the completion to stop, None if the completion finished for some other reason including encountering the EOS tokenr   stop_reasonr$  )rP   rQ   rR   r\   rU   rT   r   r   r  r  r   r  r   r$  r   r@   r,   r9   r9   r9   r=   r    s   
 $r  c                   @   s   e Zd ZU edd dZeed< dZed ed< edd dZ	e
ed< eed	< ee ed
< dZeed  ed< dZee ed< eed< edddZeeeef  ed< dS )CompletionResponsec                   C   rb   Nzcmpl-rc   r9   r9   r9   r=   rd     re   zCompletionResponse.<lambda>rf   rh   text_completionri   c                   C   rj   r8   rk   r9   r9   r9   r=   rd     re   rm   rz   choicesNr   r   system_fingerprintr  KVTransfer parameters.r   rD  )rP   rQ   rR   r   rh   rT   rU   ri   r   rm   r\   r   r  r   r   r  r   rD  r@   r   r9   r9   r9   r=   r    s   
 r  c                   @   s^   e Zd ZU eed< eed< dZee ed< dZ	ee ed< e
dddZeeeef  ed< dS )	CompletionResponseStreamChoicer  r   Nr   r  r  r   r  )rP   rQ   rR   r\   rU   rT   r   r   r  r  r   r  r   r9   r9   r9   r=   r    s   
 r  c                   @   sp   e Zd ZU edd dZeed< dZeed< edd dZe	ed< eed	< e
e ed
< eddZee ed< dS )CompletionStreamResponsec                   C   rb   r  rc   r9   r9   r9   r=   rd     re   z!CompletionStreamResponse.<lambda>rf   rh   r  ri   c                   C   rj   r8   rk   r9   r9   r9   r=   rd     re   rm   rz   r  Nr   r  )rP   rQ   rR   r   rh   rT   rU   ri   rm   r\   r   r  r  r   r   r9   r9   r9   r=   r    s   
 r  c                   @   s6   e Zd ZU eed< dZeed< eee	 ef ed< dS )EmbeddingResponseDatar  Z	embeddingri   N
rP   rQ   rR   r\   rU   ri   rT   r   r   r   r9   r9   r9   r=   r       
 r  c                   @   b   e Zd ZU edd dZeed< dZeed< edd dZe	ed< eed	< e
e ed
< eed< dS )EmbeddingResponsec                   C   rb   Nzembd-rc   r9   r9   r9   r=   rd     re   zEmbeddingResponse.<lambda>rf   rh   r   ri   c                   C   rj   r8   rk   r9   r9   r9   r=   rd     re   rm   rz   rJ   r  N)rP   rQ   rR   r   rh   rT   rU   ri   rm   r\   r   r  r   r9   r9   r9   r=   r       
 r  c                   @   s@   e Zd ZU eed< dZeed< eeee	  ee	 ef ed< dS )PoolingResponseDatar  Zpoolingri   rJ   Nr  r9   r9   r9   r=   r    s   
 "r  c                   @   r  )PoolingResponsec                   C   rb   )Nzpool-rc   r9   r9   r9   r=   rd     re   zPoolingResponse.<lambda>rf   rh   r   ri   c                   C   rj   r8   rk   r9   r9   r9   r=   rd     re   rm   rz   rJ   r  N)rP   rQ   rR   r   rh   rT   rU   ri   rm   r\   r   r  r   r9   r9   r9   r=   r  
  r  r  c                   @   s*   e Zd ZU eed< dZeed< eed< dS )ScoreResponseDatar  Zscoreri   N)rP   rQ   rR   r\   rU   ri   rT   r   r9   r9   r9   r=   r    s   
 r  c                   @   r  )ScoreResponsec                   C   rb   r  rc   r9   r9   r9   r=   rd     re   zScoreResponse.<lambda>rf   rh   r   ri   c                   C   rj   r8   rk   r9   r9   r9   r=   rd     re   rm   rz   rJ   r  N)rP   rQ   rR   r   rh   rT   rU   ri   rm   r\   r   r  r   r9   r9   r9   r=   r    r  r  c                   @   s~   e Zd ZU dZee ed< eee ef ed< dZ	ee
 ed< dZee ed< edddZe
ed	< dZee ed
< dd ZdS )ClassificationRequestNrz   r   r#  r   r   r   r   r   r  c                 C   r  r  r  r  r9   r9   r=   r  5  r  z'ClassificationRequest.to_pooling_params)rP   rQ   rR   rz   r   rT   rU   r   r   r#  r\   r   r   r   r  rx   r  r9   r9   r9   r=   r  "  s   
 r  c                   @   s6   e Zd ZU eed< ee ed< ee ed< eed< dS )ClassificationDatar  labelZprobsZnum_classesN)	rP   rQ   rR   r\   rU   r   rT   r   r   r9   r9   r9   r=   r  9  s
   
 r  c                   @   r  )ClassificationResponsec                   C   rb   )Nz	classify-rc   r9   r9   r9   r=   rd   A  re   zClassificationResponse.<lambda>rf   rh   r   ri   c                   C   rj   r8   rk   r9   r9   r9   r=   rd   C  re   rm   rz   rJ   r  N)rP   rQ   rR   r   rh   rT   rU   ri   rm   r\   r   r  r   r9   r9   r9   r=   r  @  r  r  c                   @      e Zd ZU eed< eed< dS )FunctionCallr   	argumentsNr   r9   r9   r9   r=   r  I     
 r  c                   @   s8   e Zd ZU eedZeed< dZe	d ed< e
ed< dS )ToolCallrf   rh   r   rY   N)rP   rQ   rR   r   r#   rh   rT   rU   rY   r   r  r9   r9   r9   r=   r  N  s   
 r  c                   @   s.   e Zd ZU dZee ed< dZee ed< dS )DeltaFunctionCallNr   r  )rP   rQ   rR   r   r   rT   rU   r  r9   r9   r9   r=   r  T  r   r  c                   @   sJ   e Zd ZU dZee ed< dZeed  ed< e	ed< dZ
ee ed< dS )DeltaToolCallNrh   r   rY   r  )rP   rQ   rR   rh   r   rT   rU   rY   r   r\   r   r  r9   r9   r9   r=   r  Z  s
   
 r  c                   @   s2   e Zd ZU eed< ee ed< dZee	 ed< dS )ExtractedToolCallInformationZtools_called
tool_callsNcontent)
rP   rQ   rR   rx   rU   r   r  r  r   rT   r9   r9   r9   r=   r  a  s   
 r  c                   @   s   e Zd ZU eed< dZee ed< dZee ed< dZee	 ed< dZ
ee ed< dZee ed< eedZee ed	< dZee ed
< dS )ChatMessageroleNr  refusalannotationsaudiofunction_callrf   r  reasoning_content)rP   rQ   rR   rT   rU   r  r   r  r  OpenAIAnnotationr  OpenAIChatCompletionAudior  r  r   r   r  r  r  r9   r9   r9   r=   r  m  s   
 r  c                   @   s6   e Zd ZU eed< dZeed< dZee	e
  ed< dS )ChatCompletionLogProbtokeng    logprobNr  )rP   rQ   rR   rT   rU   r  r   r  r   r   r\   r9   r9   r9   r=   r  z  r  r  c                   @   s<   e Zd ZU dZeeee   ed< e	e
dZe
e ed< dS )ChatCompletionLogProbsContentNr3   rf   r   )rP   rQ   rR   r3   r   r   rA   rT   rU   r   r   r   r  r9   r9   r9   r=   r    s   
 r  c                   @   s"   e Zd ZU dZeee  ed< dS )ChatCompletionLogProbsNr  )rP   rQ   rR   r  r   r   r  rU   r9   r9   r9   r=   r    s   
 r  c                   @   sV   e Zd ZU eed< eed< dZee ed< dZ	ee
 ed< dZeeee
f  ed< dS )ChatCompletionResponseChoicer  rX   Nr   r  r  r  )rP   rQ   rR   r\   rU   r  r   r   r  r  rT   r  r   r9   r9   r9   r=   r    s   
 r  c                   @   s   e Zd ZU edd dZeed< dZed ed< edd dZ	e
ed< eed	< ee ed
< dZeed  ed< dZee ed< eed< dZeeeee
ef    ed< edddZeeeef  ed< dS )ChatCompletionResponsec                   C   rb   Nz	chatcmpl-rc   r9   r9   r9   r=   rd     re   zChatCompletionResponse.<lambda>rf   rh   zchat.completionri   c                   C   rj   r8   rk   r9   r9   r9   r=   rd     re   rm   rz   r  Nr   r   r  r  r$  r  r   rD  )rP   rQ   rR   r   rh   rT   rU   ri   r   rm   r\   r   r  r   r   r  r   r$  r@   r,   rD  r   r9   r9   r9   r=   r    s   
  r  c                   @   sT   e Zd ZU dZee ed< dZee ed< dZee ed< e	e
dZe
e ed< dS )DeltaMessageNr  r  r  rf   r  )rP   rQ   rR   r  r   rT   rU   r  r  r   r   r  r  r9   r9   r9   r=   r    s
   
 r  c                   @   sV   e Zd ZU eed< eed< dZee ed< dZ	ee
 ed< dZeeee
f  ed< dS )"ChatCompletionResponseStreamChoicer  deltaNr   r  r  )rP   rQ   rR   r\   rU   r  r   r   r  r  rT   r  r   r9   r9   r9   r=   r    s   
 r  c                   @   t   e Zd ZU edd dZeed< dZed ed< edd dZ	e
ed< eed	< ee ed
< eddZee ed< dS )ChatCompletionStreamResponsec                   C   rb   r  rc   r9   r9   r9   r=   rd     re   z%ChatCompletionStreamResponse.<lambda>rf   rh   zchat.completion.chunkri   c                   C   rj   r8   rk   r9   r9   r9   r=   rd     re   rm   rz   r  Nr  r  )rP   rQ   rR   r   rh   rT   rU   ri   r   rm   r\   r   r  r  r   r   r9   r9   r9   r=   r       
 r  c                   @   >   e Zd ZU eed< dZee ed< dZee	e
ef  ed< dS )!TranscriptionResponseStreamChoicer  Nr  r  rP   rQ   rR   r  rU   r  r   rT   r  r   r\   r9   r9   r9   r=   r    r   r  c                   @   r  )TranscriptionStreamResponsec                   C   rb   )Nztrsc-rc   r9   r9   r9   r=   rd     re   z$TranscriptionStreamResponse.<lambda>rf   rh   ztranscription.chunkri   c                   C   rj   r8   rk   r9   r9   r9   r=   rd     re   rm   rz   r  Nr  r  )rP   rQ   rR   r   rh   rT   rU   ri   r   rm   r\   r   r  r  r   r   r9   r9   r9   r=   r    r  r  c                   @   r]   )InputTokensDetailsr   Nr  r9   r9   r9   r=   r    r`   r  c                   @   r]   )OutputTokensDetailsZreasoning_tokensNr  r9   r9   r9   r=   r    r`   r  c                   @   s6   e Zd ZU eed< eed< eed< eed< eed< dS )ResponseUsageZinput_tokensZinput_tokens_detailsZoutput_tokensZoutput_tokens_detailsr   N)rP   rQ   rR   r\   rU   r  r  r9   r9   r9   r=   r    s   
 r  c                   @   s  e Zd ZU edd dZeed< edd dZeed< dZ	e
e ed< dZe
e ed	< eed
< dZed ed< ee ed< eed< eed< eed< ee ed< eed< eed< eed< dZe
e ed< dZe
e ed< dZe
e ed< dZe
e ed< ed ed< eed< dZe
e ed< eed< ed ed< dZe
e ed < dZ e
e ed!< e!	d)d"e"d#e#d$ed%edee ded e
e d&d fd'd(Z$dS )*ResponsesResponsec                   C   rb   r   rc   r9   r9   r9   r=   rd     re   zResponsesResponse.<lambda>rf   rh   c                   C   rj   r8   rk   r9   r9   r9   r=   rd     re   
created_atNr   r   rz   responseri   outputr   r   r   r   r   r   r   r   r   r   r   r   r   statusr   r   r   r   r  r   requestsampling_params
model_namecreated_timer   c                 C   s   | di d|j d|d|jd|jd|d|d|jd|jd	|jd
|jd|jd|jd|j	d|j
d|jd|jd|jd|jd|d|jd|jd|jd|jd|S )Nrh   r  r   r   rz   r  r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r  r9   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )rI   r  r  r  r  r  r  r  r9   r9   r=   from_request  sb   	
zResponsesResponse.from_requestr8   )%rP   rQ   rR   r   rh   rT   rU   r  r\   r   r   r   r   ri   r   r   r   rx   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r   rV   r   r+   r  r9   r9   r9   r=   r    sZ   
 		r  c                   @   sT   e Zd ZU dZeed< eed< eed< eed< edddede	d	e
fd
dZdS )BatchRequestInputz
    The per-line object of the batch input file.

    NOTE: Currently only the `/v1/chat/completions` endpoint is supported.
    	custom_idmethodurlbodyplainr5   valueinfoc                 C   sh   |j d }|dkrt|S |dkrtt|S |dr#t|S |dr-t|S tt	|S )Nr  z/v1/chat/completionsz/v1/embeddingsz/scorez/rerank)
rJ   r  Zmodel_validater   EmbeddingRequestZvalidate_pythonendswithr  r  BatchRequestInputBody)rI   r  r  r  r9   r9   r=   check_type_for_url;  s   





z$BatchRequestInput.check_type_for_urlN)rP   rQ   rR   __doc__rT   rU   r  r   rV   r   r   r  r9   r9   r9   r=   r  %  s   
 
r  c                   @   s>   e Zd ZU dZeed< eed< dZee	e
eeef  ed< dS )BatchResponseData   status_coder   Nr  )rP   rQ   rR   r  r\   rU   rT   r  r   r   r  r  r  r  r9   r9   r9   r=   r  K  s   
 
r  c                   @   s:   e Zd ZU dZeed< eed< ee ed< ee ed< dS )BatchRequestOutputzA
    The per-line object of the batch output and error files
    rh   r  r  r_   N)	rP   rQ   rR   r  rT   rU   r   r  r   r9   r9   r9   r=   r  W  s   
 r  c                   @   sR   e Zd ZU dZee ed< eed< edddZe	ed< edd	dZ
ee	 ed
< dS )TokenizeCompletionRequestNrz   r   Tr}  r   r-  FFIf true, also return the token strings corresponding to the token ids.return_token_strs)rP   rQ   rR   rz   r   rT   rU   r   r-  rx   r  r9   r9   r9   r=   r  i  s   
 r  c                   @   s  e Zd ZU dZee ed< ee ed< e	dddZ
eed< e	dd	dZee ed
< e	dddZeed< e	dddZeed< e	dddZee ed< e	dddZeeeef  ed< e	dddZeeeef  ed< e	dddZeee  ed< eddedd ZdS )TokenizeChatRequestNrz   r  Tr(  r   r)  Fr  r  r*  r+  r,  r-  r/  r0  r1  r2  r   r   z#A list of tools the model may call.r   r   r5   c                 C   rp  rq  r   r   r9   r9   r=   rr    rs  z+TokenizeChatRequest.check_generation_prompt)rP   rQ   rR   rz   r   rT   rU   r   r"   r   r)  rx   r  r+  r-  r0  r2  r@   r   r   r   r   r   rV   rr  r9   r9   r9   r=   r  z  sL   
 		r  c                   @   s>   e Zd ZU eed< eed< ee ed< dZeee  ed< dS )TokenizeResponsecountr   r  N
token_strs)	rP   rQ   rR   r\   rU   r   r	  r   rT   r9   r9   r9   r=   r    s
   
 r  c                   @   s*   e Zd ZU dZee ed< ee ed< dS )DetokenizeRequestNrz   r  )	rP   rQ   rR   rz   r   rT   rU   r   r\   r9   r9   r9   r=   r
    s   
 r
  c                   @   r]   )DetokenizeResponser   Nr   r9   r9   r9   r=   r    r`   r  c                   @   s$   e Zd ZU dZeddZeed< dS )TokenizerInfoResponsez^
    Response containing tokenizer configuration 
    equivalent to tokenizer_config.json
    r0   r1   Ztokenizer_classN)rP   rQ   rR   r  r   rS   rT   rU   r9   r9   r9   r=   r    s   
 
r  c                   @   r  )LoadLoRAAdapterRequest	lora_nameZ	lora_pathNr   r9   r9   r9   r=   r    r  r  c                   @   s,   e Zd ZU eed< eddZee ed< dS )UnloadLoRAAdapterRequestr  Nr  lora_int_id)	rP   rQ   rR   rT   rU   r   r  r   r\   r9   r9   r9   r=   r    s   
 r  )r   r   ZsrtZverbose_jsonZvttAudioResponseFormatc                   @   s  e Zd ZU eed< 	 dZee ed< 	 dZee ed< 	 e	ddZ
eed< 	 e	ddZeed	< 	 e	d
g dZeed  ed< 	 dZee ed< 	 dZee ed< dZee ed< e	dddZeeeeeeef f  ed< e	ddZeed< 	 dZee ed< 	 dZee ed< 	 dZee ed< 	 e	dejejdZ ee ed< 	 dZ!ee ed< 	 dZ"ee ed< 	 dZ#ee ed< 	 dddd dd!Z$eed"< 	d,d#ed$ee d%e%fd&d'Z&e'd(d)e(d*d+ Z)dS )-TranscriptionRequestfileNrz   language r  r   r   r   ztimestamp_granularities[])r7   r   )wordsegmenttimestamp_granularitiesFr   stream_include_usagestream_continuous_usage_statsrE  r   rF  r  r   r   r  r  r  r  r  r  r
  r   r   rG  r   r   r   r   c           	      C   s   |}|d u ri }| j  }d u r|d| jd }| j }d u r(|d| jd }| j }d u r8|d| jd }| j }d u rH|d| jd }| j }d u rX|d| jd }tj||| j	|||| j
|| j| jrltjntj| jdS )Nr   r   r  r  r  )r   r   r  r   r  r  r  r  r
  r   rQ  )r   r   r   r   r  r  r  r+   r   r  r  r
  r   r*   r   r   rF  )	r   r   r   r   r   r   r  r  r  r9   r9   r=   r   V  sL   



z'TranscriptionRequest.to_sampling_paramsr   r5   c                    sX   t  dtrttjddddg} dd}t fdd	|D r*|s*td
 S )Nr  z4Expected 'file' to be a file-like object, not 'str'.)r  detailr  r  r   Fc                 3        | ]}t  |d V  qdS FNrx   r   r:   sorJ   r9   r=   r>         zFTranscriptionRequest.validate_transcription_request.<locals>.<genexpr>rd  )r?   r   rT   r	   r   UNPROCESSABLE_ENTITYrE   r   rI   rJ   Zstream_optsr   r9   r!  r=   validate_transcription_request  s   z3TranscriptionRequest.validate_transcription_requestr8   )*rP   rQ   rR   r
   rU   rz   r   rT   r  r   r   r   r  r  r   r   r   rx   r  r  rF  r@   r   r\   r   r   r   r  r  rv  r   rw  r  r  r  r
  r   r+   r   r   rV   r%  r9   r9   r9   r=   r    sp   
 $
+r  c                   @   r]   )TranscriptionResponser   Nr   r9   r9   r9   r=   r&       
 r&  c                   @   *   e Zd ZU eed< 	 eed< 	 eed< dS )TranscriptionWordr   startr  NrP   rQ   rR   r   rU   rT   r9   r9   r9   r=   r)       
 r)  c                   @   t   e Zd ZU eed< 	 eed< 	 eed< 	 eed< 	 eed< 	 eed< 	 eed< 	 eed< 	 eed	< 	 ee ed
< dS )TranscriptionSegmentrh   avg_logprobcompression_ratior   no_speech_probseekr*  r   r   r  NrP   rQ   rR   r\   rU   r   rT   r   r9   r9   r9   r=   r.    *   
 r.  c                   @   V   e Zd ZU eed< 	 eed< 	 eed< 	 dZeee  ed< 	 dZ	eee
  ed< dS )TranscriptionResponseVerbosedurationr  r   Nsegmentswords)rP   rQ   rR   rT   rU   r8  r   r   r.  r9  r)  r9   r9   r9   r=   r6       
 r6  c                   @   r  )TranslationResponseStreamChoicer  Nr  r  r  r9   r9   r9   r=   r;    r   r;  c                   @   r  )TranslationStreamResponsec                   C   rb   )Nztrsl-rc   r9   r9   r9   r=   rd     re   z"TranslationStreamResponse.<lambda>rf   rh   ztranslation.chunkri   c                   C   rj   r8   rk   r9   r9   r9   r=   rd     re   rm   rz   r  Nr  r  )rP   rQ   rR   r   rh   rT   rU   ri   r   rm   r\   r   r;  r  r   r   r9   r9   r9   r=   r<    r  r<  c                   @   s   e Zd ZU eed< 	 dZee ed< 	 eddZ	eed< 	 eddZ
eed< 	 ed	dZeed
< 	 dZee ed< 	 dZee ed< 	 dZee ed< dZee ed< d
diZeed< 	ddedee defddZeddedd ZdS )TranslationRequestr  Nrz   r  r  r   r   r   r  r   r  Fr   r  r  r   r   r   r   r   c                 C   sR   |}|d u ri }| j  }d u r|d| jd }tj||| jr$tjdS tjdS )Nr   )r   r   r   )	r   r   r   r+   r   r   r*   r   r   )r   r   r   r   r   r9   r9   r=   r   .	  s   
z%TranslationRequest.to_sampling_paramsr   r5   c                    s:   ddg}  dd}t fdd|D r|std S )Nr  r  r   Fc                 3   r  r  r  r  r!  r9   r=   r>   G	  r"  z=TranslationRequest.validate_stream_options.<locals>.<genexpr>rd  )r   rE   r   r$  r9   r!  r=   re  B	  s   z*TranslationRequest.validate_stream_optionsr8   )rP   rQ   rR   r
   rU   rz   r   rT   r   r   r   r  r   r   r  r   rx   r  r  r   r@   r\   r+   r   r   rV   re  r9   r9   r9   r=   r=    s<   
 

r=  c                   @   r]   )TranslationResponser   Nr   r9   r9   r9   r=   r>  O	  r'  r>  c                   @   r(  )TranslationWordr   r*  r  Nr+  r9   r9   r9   r=   r?  T	  r,  r?  c                   @   r-  )TranslationSegmentrh   r/  r0  r   r1  r2  r*  r   r   r  Nr3  r9   r9   r9   r=   r@  _	  r4  r@  c                   @   r5  )TranslationResponseVerboser7  r  r   Nr8  r9  )rP   rQ   rR   rT   rU   r8  r   r   r@  r9  r?  r9   r9   r9   r=   rA  	  r:  rA  )r   rl   httpr   typingr   r   r   r   r   r   rL  r   ZtorchZfastapir	   r
   Z'openai.types.chat.chat_completion_audior   r  Z)openai.types.chat.chat_completion_messager   r  Zopenai.types.responsesr   r   r   r   r   r   r   ImportErrorr   Zopenai.types.responses.responser   Zopenai.types.responses.toolr   Zopenai.types.sharedr   r   Zpydanticr   r   r   r   r   r   r   Ztyping_extensionsr    r{   r!   Zvllm.entrypoints.chat_utilsr"   r#   Zvllm.entrypoints.score_utilsr$   r%   Zvllm.loggerr&   Zvllm.pooling_paramsr'   Zvllm.sampling_paramsr(   r)   r*   r+   Zvllm.sequencer,   Z
vllm.utilsr-   r.   rP   rF   Ziinfolongrv  r/   rW   r^   ra   ry   r   r   r   r   r   r   r   r   ru  r   r   r   r   r   r   r   rT   rx  r   r   rU   r   r  ry  r  r  r  ZPoolingCompletionRequestZPoolingChatRequestZPoolingRequestr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  ZTokenizeRequestr  r
  r  r  r  r  r  r  r&  r)  r.  r6  r;  r<  r=  r>  r?  r@  rA  r9   r9   r9   r=   <module>   s"  
  $!		


     *  C(D					
		D&C
 ,*	a*