o
    )i.                     @   s  d dl Z d dlmZmZ d dlmZmZmZmZm	Z	 d dl
mZmZ d dlmZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dl m!Z!m"Z"m#Z# d dl$m%Z% d dl&m'Z'm(Z( d dl)m*Z* d dl+m,Z,m-Z-m.Z. ee/Z0G dd deZ1dS )    N)ABCabstractmethod)AsyncGeneratorIterableMappingOptionalUnion)BeamSearchSequencecreate_sort_beams_key_function)DecodingConfigModelConfig
VllmConfig)SchedulerOutputs)
PromptTypeTokensPrompt)"is_explicit_encoder_decoder_prompt)InputPreprocessor)init_logger)LoRARequest)SamplerOutput)CompletionOutputPoolingRequestOutputRequestOutput)PoolingParams)BeamSearchParamsSamplingParams)AnyTokenizer)Devicecollect_from_async_generatorrandom_uuidc                   @   s  e Zd ZdZeedefddZeedefddZeedefddZ	eede
fd	d
Ze			dHdedededee deeeef  dedeedf fddZ	dIdedededee deedf f
ddZe			dHdedededee deeeef  dedeedf fddZedeeee f ddfddZedefddZede fdd Z!ede"fd!d"Z#ede$fd#d$Z%e	dIdee de&fd%d&Z'edefd'd(Z(e		dJd)ee) d*ee*e+  ddfd+d,Z,edKd-d.Z-edKd/d0Z.edKd1d2Z/edKd3d4Z0e	dId5ee1 ddfd6d7Z2edLd9eddfd:d;Z3edId<ee*e  ddfd=d>Z4edefd?d@Z5ededdfdAdBZ6	CdMdDedEeddfdFdGZ7dS )NEngineClientz$Protocol class for Clients to Enginereturnc                 C      d S N selfr$   r$   `/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/engine/protocol.py
is_running      zEngineClient.is_runningc                 C   r"   r#   r$   r%   r$   r$   r'   
is_stopped"   r)   zEngineClient.is_stoppedc                 C   r"   r#   r$   r%   r$   r$   r'   errored'   r)   zEngineClient.erroredc                 C   r"   r#   r$   r%   r$   r$   r'   
dead_error,   r)   zEngineClient.dead_errorNr   promptsampling_params
request_idlora_requesttrace_headerspriorityc                 C      dS )zGenerate outputs for a request.Nr$   )r&   r-   r.   r/   r0   r1   r2   r$   r$   r'   generate1      zEngineClient.generateparamsc           -        s  |j }|j}|j}|j}|j}	|j}
|  I d H }| }| I d H }t	|r+t
||}|d dkr8t
|d}|d}|d}|d}t| t|j|	}td| d|d	}t|d
g |||dg}g }t|D ]}tdd |D  \}}g }dt  }tt||D ] \}\}}| d| }tt| j||||d}|| qtj| I d H } dd | D } g }!t|D ]f\}}"| | }#|#jd
 jd ur(|#jd
 jd
 }$|$ D ]H\}%}&|%|jkr|s|t|
r|"j|%g n|"j|"j|$g |"j |&j! d|jd q|!t|"j|%g |"j|$g |"j"|"j |&j! |"j#|"j$d qqt%|!|dd}'|'d | }qp|&| t%||dd}(|(d | })|)D ]$}*|*jd |jkra|sa|*j d }+n|*j d  }+|'|+|*_(qKt)|| fddt|)D d|d d},|,V  d S )NtypeZembedsprompt_token_idsmulti_modal_datar-   mm_processor_kwargs      )logprobs
max_tokenstemperaturer   )tokenscum_logprobr=   r9   r:   r0   c                 S   s&   g | ]}t |j|j|jd |jfqS ))r8   r9   r:   )r   r@   r9   r:   r0   ).0beamr$   r$   r'   
<listcomp>|   s    z,EngineClient.beam_search.<locals>.<listcomp>zbeam_search--)r0   c                 S   s   g | ]}|d  qS )r   r$   )rB   xr$   r$   r'   rD      s    stop)r@   r=   rA   finish_reasonstop_reason)r@   r=   r0   rA   r9   r:   T)keyreversec                    sH   g | ] \}}t |j|j|j d  ||j|jd ur|jnd|jdqS )Nlength)textZcumulative_logprobZ	token_idsindexr=   rH   rI   )r   rN   rA   r@   r=   rH   rI   )rB   irC   Ztokenized_lengthr$   r'   rD      s    	
)r/   r-   outputsfinishedr8   Zprompt_logprobs)*
beam_widthr>   
ignore_eosr?   length_penaltyinclude_stop_str_in_outputget_input_preprocessorZget_tokenizer_groupZget_lora_tokenizer_asyncr   NotImplementedErrorZ_prompt_to_llm_inputsgetlenr
   Zeos_token_idr   r	   rangezipr   	enumerateasynciocreate_taskr   r4   appendgatherrR   r=   itemsr@   rA   Zlogprobr0   r9   r:   sortedextenddecoderN   r   )-r&   r-   r/   r6   r0   rT   r>   rU   r?   rV   rW   ZpreprocessorZtokenizer_groupZ	tokenizerZprocessed_inputsr8   r9   Zprompt_textr:   Zsort_beams_keyZbeam_search_paramsZ	all_beams	completed_Zprompts_batchZlora_req_batchtasksrP   Zindividual_promptZlora_reqZrequest_id_itemtaskoutputZ	new_beamsZcurrent_beamresultr=   Ztoken_idZlogprob_objZsorted_beamsZsorted_completedZ
best_beamsrC   r@   Zbeam_search_outputr$   rQ   r'   beam_search>   s   








	
zEngineClient.beam_searchpooling_paramsc                 C   r3   )z4Generate outputs for a request from a pooling model.Nr$   )r&   r-   rn   r/   r0   r1   r2   r$   r$   r'   encode   r5   zEngineClient.encodec                       dS )zAbort a request.

        Args:
            request_id: The unique id of the request,
                        or an iterable of such ids.
        Nr$   )r&   r/   r$   r$   r'   abort   s   zEngineClient.abortc                    rp   )z.Get the vllm configuration of the vLLM engine.Nr$   r%   r$   r$   r'   get_vllm_config      zEngineClient.get_vllm_configc                    rp   )z/Get the model configuration of the vLLM engine.Nr$   r%   r$   r$   r'   get_model_config   rs   zEngineClient.get_model_configc                    rp   )z2Get the decoding configuration of the vLLM engine.Nr$   r%   r$   r$   r'   get_decoding_config   rs   z EngineClient.get_decoding_configc                    rp   )z+Get the input processor of the vLLM engine.Nr$   r%   r$   r$   r'   rX      rs   z#EngineClient.get_input_preprocessorc                    rp   )z-Get the appropriate tokenizer for the requestNr$   r&   r0   r$   r$   r'   get_tokenizer     zEngineClient.get_tokenizerc                       d S r#   r$   r%   r$   r$   r'   is_tracing_enabled  s   zEngineClient.is_tracing_enabledscheduler_outputsmodel_outputc                    ry   r#   r$   )r&   r{   r|   r$   r$   r'   do_log_stats  rx   zEngineClient.do_log_statsc                    rp   )zRaise if unhealthyNr$   r%   r$   r$   r'   check_health  rs   zEngineClient.check_healthc                    rp   zStart profiling the engineNr$   r%   r$   r$   r'   start_profile  rs   zEngineClient.start_profilec                    rp   r   r$   r%   r$   r$   r'   stop_profile#  rs   zEngineClient.stop_profilec                    rp   )zReset the multi-modal cacheNr$   r%   r$   r$   r'   reset_mm_cache(  rs   zEngineClient.reset_mm_cachedevicec                    rp   )zReset the prefix cacheNr$   )r&   r   r$   r$   r'   reset_prefix_cache-     zEngineClient.reset_prefix_cacher<   levelc                    rp   )zSleep the engineNr$   )r&   r   r$   r$   r'   sleep3  rs   zEngineClient.sleeptagsc                    rp   )zWake up the engineNr$   )r&   r   r$   r$   r'   wake_up8  rs   zEngineClient.wake_upc                    rp   )z$Check whether the engine is sleepingNr$   r%   r$   r$   r'   is_sleeping=  rs   zEngineClient.is_sleepingc                    rp   )z<Load a new LoRA adapter into the engine for future requests.Nr$   rv   r$   r$   r'   add_loraB  rs   zEngineClient.add_lora,  new_data_parallel_sizedrain_timeoutc                    s   t )zScale the engine)rY   )r&   r   r   r$   r$   r'   scale_elastic_epG  r   zEngineClient.scale_elastic_ep)NNr   r#   )NN)r!   N)r<   )r   )8__name__
__module____qualname____doc__propertyr   boolr(   r*   r+   BaseExceptionr,   r   r   strr   r   r   intr   r   r4   r   rm   r   r   ro   r   r   rq   r   rr   r   rt   r   ru   r   rX   r   rw   rz   r   listr   r}   r~   r   r   r   r   r   r   r   r   r   r   r$   r$   r$   r'   r       s    


 
 	
r    )2r_   abcr   r   typingr   r   r   r   r   Zvllm.beam_searchr	   r
   Zvllm.configr   r   r   Zvllm.core.schedulerr   Zvllm.inputs.datar   r   Zvllm.inputs.parser   Zvllm.inputs.preprocessr   Zvllm.loggerr   Zvllm.lora.requestr   Z"vllm.model_executor.layers.samplerr   Zvllm.outputsr   r   r   Zvllm.pooling_paramsr   Zvllm.sampling_paramsr   r   Z!vllm.transformers_utils.tokenizerr   Z
vllm.utilsr   r   r   r   loggerr    r$   r$   r$   r'   <module>   s&   