o
    )iG                     @   s&  d dl Z d dlmZ d dlmZmZmZmZ d dlm	Z	 d dl
mZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZ d dlmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ G dd dZ0dS )    N)Mapping)AnyLiteralOptionalUnion)
VllmConfig)ProcessorInputs
PromptTypeSingletonInputs)split_enc_dec_inputs)InputPreprocessor)LoRARequest)MULTIMODAL_REGISTRYMultiModalRegistry)MultiModalKwargsItemPlaceholderRange)EncDecMultiModalProcessor)argsort_mm_positions)PoolingParams)SamplingParams)TokenizerGroup)
is_list_of)EngineCoreRequest)MultiModalInputCacheClient)validate_guidance_grammar)+validate_structured_output_request_outlines)validate_xgrammar_grammarc                   @   sx  e Zd ZefdededefddZedd Z	de
d	d
fddZde
dee d	d
fddZde
d	d
fddZde
d	d
fddZdee
ef dee fddZdee d	d
fddZde
d	d
fddZ	
	
	
	
		
d,dededee
ef dee dee deeeef  deeeef  d ed!ee d	eee ef fd"d#Z	
d-d$e dee fd%d&Z!d'e"dee d(e#d) fd*d+Z$d
S ).	Processorvllm_config	tokenizermm_registryc                 C   s\   || _ |j| _|j| _|j| _|j| _|| _| j | _t| j| j|| _	t
| j|| _d S N)r   model_configcache_configlora_configdecoding_configr   Ztry_get_generation_configgeneration_config_fieldsr   input_preprocessorr   mm_input_cache_client)selfr   r   r     r*   d/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/v1/engine/processor.py__init__!   s   
zProcessor.__init__c                 C   s   | j jS r!   )r'   r    )r)   r*   r*   r+   r    8   s   zProcessor.mm_registryparamsreturnNc                 C   sr   | j j}|dkr
d S |jr"|jdks|j|kr"td|j d| |jr5|j|kr7td|j d| d S d S )NzRequested sample logprobs of z%, which is greater than max allowed: zRequested prompt logprobs of )r"   max_logprobsZlogprobs
ValueErrorZprompt_logprobs)r)   r-   r0   r*   r*   r+   _validate_logprobs<   s"   


zProcessor._validate_logprobslora_requestc                    sv   |  | | | |jd u rd S |jstd| jd u rd S | j|}t| t fdd|jD s9tdd S )Nz(allowed_token_ids is not None and empty!c                 3   s(    | ]}d |  ko k n  V  qdS )r   Nr*   ).0tid
vocab_sizer*   r+   	<genexpr>b   s   & z6Processor._validate_sampling_params.<locals>.<genexpr>z1allowed_token_ids contains out-of-vocab token id!)_validate_structured_output_validate_logit_biasZallowed_token_idsr1   r   get_lora_tokenizerlenall)r)   r-   r3   r   r*   r6   r+   _validate_sampling_paramsP   s   



z#Processor._validate_sampling_paramsc                 C   sZ   |j sdS | j }g }|j D ]}|dk s||kr|| q|r+td| d| dS )z:Validate logit_bias token IDs are within vocabulary range.Nr   ztoken_id(s) z@ in logit_bias contain out-of-vocab token ids. Vocabulary size: )Z
logit_biasr"   Zget_vocab_sizeappendr1   )r)   r-   r7   Zinvalid_token_idsZtoken_idr*   r*   r+   r:   f   s   


zProcessor._validate_logit_biasc                 C   s.   |j d ur|j dkrtd|jrtdd S )N   z%vLLM V1 does not yet support best_of.zEvLLM V1 does not support per request user provided logits processors.)Zbest_ofr1   Zlogits_processors)r)   r-   r*   r*   r+   #_validate_supported_sampling_paramsz   s
   z-Processor._validate_supported_sampling_paramsc                 C   s2   t |trdS | | | || | | dS )zr
        Validate supported SamplingParam.
        Should raise ValueError if unsupported for API Server.
        N)
isinstancer   r2   r>   rA   )r)   r-   r3   r*   r*   r+   _validate_params   s
   


zProcessor._validate_paramsc                 C   s&   |d ur| j std| dd S d S )NzGot lora_request z but LoRA is not enabled!)r$   r1   )r)   r3   r*   r*   r+   _validate_lora   s   zProcessor._validate_lorac                 C   s,  |j r| jsd S | jjr|j rtd| jj}|j jr7|j j|kr6|dkr)|j js6td|j j d| dn||j _t|j jt	rP|j jsPtd|j j d|
dr[t| d S |
d	rht|d d
 d S |dkrrt| d S z
t| d|j _W n ty   t|d d
 d	|j _Y nw d|j _d S )NzVStructured outputs requires a tokenizer so it can't be used with 'skip_tokenizer_init'autozaRequest-level structured output backend selection is no longer supported. The request specified 'z"', but vLLM was initialised with 'zM'. This error can be resolved by removing backend selection from the request.zChoice 'z' cannot be an empty listZxgrammarZguidancer   ZoutlinesT)Zguided_decodingr%   r"   skip_tokenizer_initr1   backendZbackend_was_autorB   choicelist
startswithr   r   r   )r)   r-   Zengine_level_backendr*   r*   r+   r9      sN   

z%Processor._validate_structured_outputr   
request_idpromptarrival_timetokenization_kwargstrace_headersprioritydata_parallel_rankc
                    s2  |  | | || |d urtd| jjj}
|	d ur2d|	  kr&|
k s2n td|	 d|
 d|d u r:t }| jjpCt	| j
j}| jj||||d}ddlm} |j|||d | j|}| || t|\}}|d urttd }d }t|tr| }|jd u r| jjt|d	  |_|| j| | jd ur|| j| n| }d }d }d }|d
 dkr|d |d | d t!}fdd|D }fdd|D } d u rd n fdd|D }|d ur| j"#||}n	t$|t%sJ |}| dt&||d	 ||||||||| d||	dfS )Nz V1 does not support tracing yet.r   zdata_parallel_rank z is out of range [0, z).)rO   r3   return_mm_hashes)current_platform)rM   r-   processed_inputsprompt_token_idstypeZ
multimodal	mm_kwargsmm_placeholders	mm_hashesc                    s   g | ]
\}}  ||qS r*   )Zget_itemr4   Zmodalityidx)decoder_mm_inputsr*   r+   
<listcomp>8      
z,Processor.process_inputs.<locals>.<listcomp>c                       g | ]
\}} | | qS r*   r*   r[   )decoder_mm_positionsr*   r+   r^   <  r_   c                    r`   r*   r*   r[   )decoder_mm_hashesr*   r+   r^   @  r_   rM   
cache_salt)rL   rV   rX   rZ   rY   sampling_paramspooling_paramseos_token_idrN   r3   rc   rQ   rR   )'rD   rC   r1   r   Zparallel_configdata_parallel_sizetimer"   Zprocessor_return_mm_hashesboolr#   Zenable_prefix_cachingr'   
preprocessZvllm.platformsrT   Zvalidate_requestZget_eos_token_id_validate_model_inputsr   NotImplementedErrorrB   r   cloneZ
max_tokensmax_model_lenr<   Zupdate_from_generation_configr&   r   Zupdate_from_tokenizerr;   getr   r(   Zget_and_updater   r   r   )r)   rL   rM   r-   rN   r3   rO   rP   rQ   rR   rg   rS   rU   rT   rf   encoder_inputsdecoder_inputsrd   re   Zsorted_mm_inputsZsorted_mm_positionsZsorted_mm_hashesZsorted_mm_idxsZorig_sorted_mm_inputsr*   )rb   r]   ra   r+   process_inputs   s   













zProcessor.process_inputsinputsc                 C   s8   t |\}}|d ur| j||dd | j||dd d S )Nencoder)prompt_typedecoder)r   _validate_model_input)r)   rs   r3   rp   rq   r*   r*   r+   rk   ^  s   
z Processor._validate_model_inputsprompt_inputsru   )rt   rv   c             	   C   s   | j }|d }|s|dkr|jrntd| d| j jr d }n| j|}t|dd}||jkr9td| d| j j}t	||kr{|dkra|jra| j
j}	|	j||d	}
t|
ts\J |
jrad S |jrgd
}nd}td| dt	| d| d| d S )NrV   rt   zThe z prompt cannot be emptyr   )defaultz	Token id z is out of vocabularyrF   zMake sure that `max_model_len` is no smaller than the number of text tokens plus multimodal tokens. For image inputs, the number of image tokens depends on the number of images, and possibly their aspect ratios as well.zLMake sure that `max_model_len` is no smaller than the number of text tokens.z prompt (length z-) is longer than the maximum model length of z. )r"   Zis_multimodal_modelr1   rG   r   r;   maxZmax_token_idrn   r<   r'   r    Zcreate_processorrB   r   Zpad_dummy_encoder_prompt)r)   rx   r3   ru   r"   Z
prompt_idsr   Zmax_input_idZmax_prompt_lenr    Zmm_processorZ
suggestionr*   r*   r+   rw   l  sL   

zProcessor._validate_model_input)NNNNr   Nr!   )%__name__
__module____qualname__r   r   r   r   r,   propertyr    r   r2   r   r   r>   r:   rA   r   r   rC   rD   r9   strr	   floatdictr   r   inttupler   rr   r   rk   r
   r   rw   r*   r*   r*   r+   r      s    







H
	

 
r   )1rh   collections.abcr   typingr   r   r   r   Zvllm.configr   Zvllm.inputsr   r	   r
   Zvllm.inputs.parser   Zvllm.inputs.preprocessr   Zvllm.lora.requestr   Zvllm.multimodalr   r   Zvllm.multimodal.inputsr   r   Zvllm.multimodal.processingr   Zvllm.multimodal.utilsr   Zvllm.pooling_paramsr   Zvllm.sampling_paramsr   Z'vllm.transformers_utils.tokenizer_groupr   Z
vllm.utilsr   Zvllm.v1.enginer   Zvllm.v1.engine.mm_input_cacher   Z*vllm.v1.structured_output.backend_guidancer   Z*vllm.v1.structured_output.backend_outlinesr   Z*vllm.v1.structured_output.backend_xgrammarr   r   r*   r*   r*   r+   <module>   s,   