o
    )il                     @   sp  d dl Z d dlZd dlmZmZ d dlmZ d dlmZm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZmZmZmZmZmZm Z  d dl!m"Z# d dl!m$Z$m%Z%m&Z&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m"Z"m-Z-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z< e1e=Z>G dd de$Z?dS )    N)AsyncGeneratorAsyncIterator)Sequence)OptionalUnioncast)Request)assert_never)ModelConfig)EngineClient)RequestLogger)
CompletionLogProbsCompletionRequestCompletionResponseCompletionResponseChoiceCompletionResponseStreamChoiceCompletionStreamResponseErrorResponsePromptTokenUsageInfoRequestResponseMetadata	UsageInfo)EmbedsPrompt)OpenAIServingTextTokensPromptclamp_prompt_logprobsis_text_tokens_prompt)OpenAIServingModels)get_max_tokens)r   TokensPromptis_embeds_promptis_tokens_prompt)init_logger)RequestOutput)BeamSearchParamsSamplingParams)Logprob)AnyTokenizer)merge_async_iteratorsc                       s@  e Zd Zdddddedededee deded	ef fd
dZ		d'de
dee deeedf eef fddZde
deeeef  deeeef  dedededededed	edeedf fddZdee de
dedededededefddZ		d(d ee d!eeeeef   d"eded#ed$ee de fd%d&Z!  Z"S ))OpenAIServingCompletionF)return_tokens_as_token_idsenable_prompt_tokens_detailsenable_force_include_usageengine_clientmodel_configmodelsrequest_loggerr)   r*   r+   c          	         s`   t  j||||||d || _| j | _| jr.| jj}|dkr"dn|}td|| j d S d S )N)r,   r-   r.   r/   r)   r+   automodelz4Using default completion sampling params from %s: %s)	super__init__r*   r-   Zget_diff_sampling_paramdefault_sampling_paramsZgeneration_configloggerinfo)	selfr,   r-   r.   r/   r)   r*   r+   source	__class__ v/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/entrypoints/openai/serving_completion.pyr3   4   s(   z OpenAIServingCompletion.__init__Nrequestraw_requestreturnc                     s  |  |I dH }|dur|S | jjr| jj|jdur!| dS |jr.|jdur.| dS d| ||j	 }t
t }t|d}|rI||j_z"| |}| j|I dH }| j|||j|j|jdI dH \}	}
W nv ty } ztd | t|W  Y d}~S d}~w ty } ztd | t|W  Y d}~S d}~w ty } ztd | t|W  Y d}~S d}~w tjy } ztd | t|W  Y d}~S d}~ww g }zt|
D ]\}}ttt t!f |}t"|rt#|d }nt$|rt#|d	 }nt%| | j&du ri | _&t'| j(||| j&d
}|j)r1|*|| j&}n
|+|| j,j-| j&}| d| }| j.||	| ||d |du rTdn| /|j0I dH }ttt t!f |}t1|t2rw| jj3||||d}n| jj4||||||j5d}|6| qW n ty } z| t|W  Y d}~S d}~ww t7| }| 8|j9|}t#|
}|j:o|j;du p|j<|j;ko|j) }|r| j=||	|||||||| j>d
S dg| }zO|2 z3 dH W \}}|||< q6 t|D ]%\}}|dusJ |jdu r|	| }t?|r|d |_qd|_qtt@tA |}| B|||||||}W n) tCjDyA   | d Y S  tyZ } z| t|W  Y d}~S d}~ww |j:rs|E  dtFtdf f fdd}| S |S )aq  Completion API similar to OpenAI's API.

        See https://platform.openai.com/docs/api-reference/completions/create
        for the API specification. This API mimics the OpenAI Completion API.

        NOTE: Currently we do not support the following feature:
            - suffix (the language models we currently support do not support
            suffix)
        Nz!suffix is not currently supportedz'Echo is unsupported with prompt embeds.zcmpl-)
request_id)truncate_prompt_tokensadd_special_tokensz$Error in preprocessing prompt inputsprompt_embedsprompt_token_ids)max_model_lenr=   input_lengthr4   -)paramslora_request)promptr@   rH   rI   )rI   trace_headerspriority)num_prompts	tokenizerrequest_metadatar+   rJ   zClient disconnectedr?   c                     s   d  dV  dV  d S )Ndata: 

data: [DONE]

r;   r;   response_jsonr;   r<   fake_stream_generator,  s   
zHOpenAIServingCompletion.create_completion.<locals>.fake_stream_generator)GZ_check_modelr,   ZerroredZ
dead_errorsuffixZcreate_error_responseechorC   Z_base_request_idr@   inttimer   staterO   Z_maybe_get_adaptersZget_tokenizerZ_preprocess_completionrJ   rA   rB   
ValueErrorr5   	exceptionstr	TypeErrorRuntimeErrorjinja2ZTemplateError	enumerater   r   r   r   r   lenr    r	   r4   r   rE   Zuse_beam_searchZto_beam_search_paramsZto_sampling_paramsr-   Zlogits_processor_patternZ_log_inputsZ_get_trace_headersheaders
isinstancer#   Zbeam_searchgeneraterL   appendr'   Z_get_model_namer1   streamZbest_ofncompletion_stream_generatorr+   r   listr"   %request_output_to_completion_responseasyncioCancelledErrormodel_dump_jsonr   ) r7   r=   r>   Zerror_check_retr@   created_timerO   rI   rN   request_promptsZengine_promptse
generatorsiZengine_promptrF   
max_tokensZsampling_paramsZrequest_id_itemrK   	generatorresult_generator
model_namerM   rg   final_res_batchres	final_resrequest_promptZfinal_res_batch_checkedresponserU   r;   rS   r<   create_completionS   sP  










	J

	z)OpenAIServingCompletion.create_completionrp   rv   r@   ro   rw   rM   rN   rO   c           .      C  sX  |j d u rdn|j }dg| | }dg| | }dg| | }dg| }d }d}|j}|r9|jp2|
}|o7|j}nd\}}zG|2 z3 d H W \}}|j}|j}|rW|j}d}|jd ur`|j}n|| }t|rm|d }nd }|d uryt	|||< |j
D ]}|j||  }|jd usJ |jr|| s|d usJ |d usJ |jdkr|}|}|}n||j }g ||j}g |pg |jpg }d||< n|j}|j}|j}|s|s|| sq||jd ur|d usJ d| j|||j||| |jd} nd } ||  t	|j7  < ||  t	|j7  < |j}!|j}"t|||t||| |!|"d	gd
}#|r9|| }$|| }%t|$|%|$|% d|#_|#jdd}&d|& dV  q|qA6 t|}'t|}(t|'|(|'|( d})| jrg|rgt|d|)_|rt|||g |)d}*|*jddd}+d|+ dV  |)|	_W n! ty }, z|  t!|,}-d|- dV  W Y d },~,nd },~,ww dV  d S )N   r   FT)FFrJ   Did not output logprobs)	token_idstop_logprobsnum_output_top_logprobsrN   initial_text_offsetreturn_as_token_id)indextextlogprobsfinish_reasonstop_reason)idcreatedr1   choicesprompt_tokenscompletion_tokensZtotal_tokens)exclude_unsetrP   rQ   Zcached_tokens)r   r   r1   r   usage)r   Zexclude_nonerR   )"rh   stream_optionsinclude_usageZcontinuous_usage_statsrD   prompt_logprobsnum_cached_tokensrJ   r   rb   outputsr   rt   rW   r   r   r   _create_completion_logprobsr)   r   r   r   r   r   r   rn   sumr*   r   prompt_tokens_detailsfinal_usage_info	ExceptionZcreate_streaming_error_responser]   ).r7   r=   rp   rv   r@   ro   rw   rM   rN   rO   r+   Znum_choicesZprevious_text_lensZprevious_num_tokensZ
has_echoednum_prompt_tokensr   Zfirst_iterationr   r   Zinclude_continuous_usageZ
prompt_idxry   rD   r   prompt_textr{   outputrs   Z
delta_textZdelta_token_idsout_logprobsr   r   r   chunkr   r   rT   Ztotal_prompt_tokensZtotal_completion_tokensr   Zfinal_usage_chunkZfinal_usage_datarq   datar;   r;   r<   ri   4  s  









j

z3OpenAIServingCompletion.completion_stream_generatorrx   c              
   C   s  g }d}	d}
d }d }|D ]}|}|j }|d usJ t|j}|j}|jD ]}|jd us-J |jrk|d us6J |jdkrB|}|}|}n2g ||j}|jd u rQd }n|d usWJ |jd us^J g ||j}||j	 }n	|j}|j}|j	}|jd ur|d usJ d| j
||||j|jd}nd }tt||||j|j|jd}|| |
t|j7 }
q$|	t|7 }	qt|	|
|	|
 d}| jr|r|jrt|jd|_||_|r|d j}t||||||dS )Nr   r   )r   r   rN   r   r   )r   r   r   r   r   r   r   r   )r   r   r1   r   r   kv_transfer_params)rD   r   r   rJ   r   rt   rW   r   r   r   r   r)   r   rb   r   r   rf   r   r*   r   r   r   r   r   r   )r7   rx   r=   r@   ro   rw   rN   rO   r   r   Znum_generated_tokensr   Zlast_final_resrz   rD   r   r   r   r   r   Zoutput_textr   Zchoice_datar   r;   r;   r<   rk     s   








z=OpenAIServingCompletion.request_output_to_completion_responser   r   r   r   r   r   c                    s"  g }g }g }	g }
d}|dur|nj t|D ]q\}}|| }|du r?|}r/d| }|	| |d |
d n0|| }j||d}t|jd}|	| || |
 fddt| D  t|dkr{|| n	||d |  t|}qt	|||	|
d	S )
z*Create logprobs for OpenAI Completion API.r   Nz	token_id:r       c                    s@   i | ]\}} |krj |d  |d dt|d  jdqS )r~   r   r   r   )_get_decoded_tokenmaxlogprob).0rs   Ztop_lpr   r7   Zshould_return_as_token_idrN   r;   r<   
<dictcomp>u  s    
zGOpenAIServingCompletion._create_completion_logprobs.<locals>.<dictcomp>)Ztext_offsetZtoken_logprobstokensr   )
r)   ra   decoderf   r   r   r   itemsrb   r   )r7   r   r   r   rN   r   r   Zout_text_offsetZout_token_logprobsZ
out_tokensZout_top_logprobsZlast_token_lenrs   Ztoken_idZstep_top_logprobstokenZ
step_tokenZtoken_logprobr;   r   r<   r   E  sR   









z3OpenAIServingCompletion._create_completion_logprobs)N)r   N)#__name__
__module____qualname__r   r
   r   r   r   boolr3   r   r   r   r   r]   r   r   r}   rj   r   ServingEngineEmbedsPromptr   tuplerX   r"   r&   r   ri   rk   GenericSequencedictr%   r   r   __classcell__r;   r;   r9   r<   r(   2   s    		"
 b	


 ,	
lr(   )@rl   rY   collections.abcr   r   r   r   typingr   r   r   r`   Zfastapir   Ztyping_extensionsr	   Zvllm.configr
   Zvllm.engine.protocolr   Zvllm.entrypoints.loggerr   Z vllm.entrypoints.openai.protocolr   r   r   r   r   r   r   r   r   r   Z&vllm.entrypoints.openai.serving_enginer   r   r   r   r   r   Z&vllm.entrypoints.openai.serving_modelsr   Zvllm.entrypoints.utilsr   Zvllm.inputs.datar   r   r    Zvllm.loggerr!   Zvllm.outputsr"   Zvllm.sampling_paramsr#   r$   Zvllm.sequencer%   Z!vllm.transformers_utils.tokenizerr&   Z
vllm.utilsr'   r   r5   r(   r;   r;   r;   r<   <module>   s2   0
