o
    )iτ                     @   s   d dl Z d dlmZ d dlmZmZmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZmZ d d
lmZ d dlmZ ddlmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( ddl)m*Z*m+Z+ ee,Z-G dd dZ.dS )    N)Mapping)AnyOptionalUnioncast)assert_never)ModelConfig)init_logger)LoRARequest)MULTIMODAL_REGISTRYMultiModalRegistry)MultiModalDataDictMultiModalEncDecInputsMultiModalInputs)AnyTokenizer)TokenizerGroup   )DecoderOnlyInputsEmbedsInputsEmbedsPromptEncoderDecoderInputsProcessorInputs
PromptTypeSingletonInputsSingletonPrompt
TextPromptTokenInputsTokensPromptembeds_inputstoken_inputs)"is_explicit_encoder_decoder_promptparse_singleton_promptc                       s.  e Zd Zefdedee deddf fddZdefdd	Z		dSd
ee
 dee fddZ	dSd
ee
 dee fddZdee fddZdee fddZdeee  dee fddZ	dSdeeeef  deeef fddZ	dSded
ee
 deeeef  dee fddZ	dSded
ee
 deeeef  dee fddZd
ee
 defdd Zd
ee
 defd!d"Z			#dTdeeee f d$ed%eeeef  deeeef  d
ee
 d&ede fd'd(Z!			#dTdeeee f d$ed%eeeef  deeeef  d
ee
 d&ede fd)d*Z"d+e#de$fd,d-Z%d+e#de$fd.d/Z&			#dTd+e'deeeef  d
ee
 d&edee(e f f
d0d1Z)			#dTd+e'deeeef  d
ee
 d&edee(e f f
d2d3Z*			#dTd+e+deeeef  d
ee
 d&edee(e f f
d4d5Z,			#dTd+e+deeeef  d
ee
 d&edee(e f f
d6d7Z-			#dTde.deeeef  d
ee
 d&ede/f
d8d9Z0			#dTde.deeeef  d
ee
 d&ede/f
d:d;Z1d<e/d=ee/ de2fd>d?Z3	dSd@ee/e4f dAee/ de5e/e/f fdBdCZ6	dSde7deeeef  de2fdDdEZ8	dSde7deeeef  de2fdFdGZ9dHe:de:fdIdJZ;			#dTde.deeeef  d
ee
 d&ede:f
dKdLZ<			#dTde.deeeef  d
ee
 d&ede:f
dMdNZ=			#dTde7deeeef  d
ee
 d&ede>f
dOdPZ?			#dTde7deeeef  d
ee
 d&ede>f
dQdRZ@  ZAS )UInputPreprocessormodel_config	tokenizermm_registryreturnNc                    s    t    || _|| _|| _d S N)super__init__r#   r$   r%   )selfr#   r$   r%   	__class__ b/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/inputs/preprocess.pyr)      s   

zInputPreprocessor.__init__c                 C   s   | j d u r	td| j S )Nz?You cannot pass text prompts when `skip_tokenizer_init` is True)r$   
ValueError)r*   r-   r-   r.   get_tokenizer_group*   s   
z%InputPreprocessor.get_tokenizer_grouplora_requestc                 C   &   | j d u rtd d S | j |jS )Nz@Using None for BOS token id because tokenizer is not initialized)r$   loggerwarningget_lora_tokenizerbos_token_idr*   r1   r-   r-   r.   get_bos_token_id1      

z"InputPreprocessor.get_bos_token_idc                 C   r2   )Nz@Using None for EOS token id because tokenizer is not initialized)r$   r3   r4   r5   Zeos_token_idr7   r-   r-   r.   get_eos_token_id;   r9   z"InputPreprocessor.get_eos_token_idc                 C   sh   | j jstd dS | j du s| j jdu rtd dS t| j jdd}|du r2td |  }|S )z
        Obtain the decoder start token id employed by an encoder/decoder
        model. Returns None for non-encoder/decoder models or if the
        model config is unavailable.
        zSUsing None for decoder start token id because this is not an encoder/decoder model.NzLUsing None for decoder start token id because model config is not available.decoder_start_token_idzaFalling back on <BOS> for decoder start token id because decoder start token id is not available.)r#   is_encoder_decoderr3   Zwarning_once	hf_configgetattrr8   )r*   Zdec_start_token_idr-   r-   r.   get_decoder_start_token_idE   s&   z,InputPreprocessor.get_decoder_start_token_idc                 C   s   |   }|dus
J |gS )aU  
        Specifically for encoder/decoder models:
        generate a default decoder prompt for when
        the user specifies only the encoder prompt.

        Encoder/decoder models utilize the decoder
        prompt in different ways; as new models are
        added, it is intended that this function
        will be extended to produce differing
        default decoder prompts, depending on the
        model variety.

        Absent a special case, the default behavior
        of this method is to mirror the behavior of
        the HuggingFace (HF) GenerationMixin for a None
        decoder prompt, which is to employ a logit processor
        setting to force the first decoded token to be <BOS>.
        Here, this behavior is approximated by having the
        "default" decoder prompt be <BOS>.

        However, it is possible that in the future
        other models may have different or more
        complex logic for the default decoder prompt.
        This motivates having a special helper method
        for default decoder prompts.

        Returns:

        * prompt_token_ids
        N)r8   )r*   r6   r-   r-   r.   #_get_default_enc_dec_decoder_promptc   s    z5InputPreprocessor._get_default_enc_dec_decoder_promptdecoder_input_idsc                 C   sJ   |   }|dus
J |du r|  }t|dks|d |kr#|g| }|S )a  
        Prepares `decoder_input_ids` for generation with encoder-decoder models.

        Based on:
        https://github.com/huggingface/transformers/blob/4037a2b5b1278736e566aec12e169100275545ea/src/transformers/generation/utils.py
        specifically,
        `GenerationMixin._prepare_decoder_input_ids_for_generation()`.

        Arguments:

        * decoder_input_ids: input token ids to preprocess

        Returns:

        * Processed token list
        Nr   )r?   r@   len)r*   rA   r;   r-   r-   r.   )_prepare_decoder_input_ids_for_generation   s   
z;InputPreprocessor._prepare_decoder_input_ids_for_generation	overridesc                 C   s6   t ttf  }| jjjdkrd|d< |r|| |S )NwhisperFZadd_special_tokens)dictstrr   r#   r=   
model_typeupdate)r*   rD   kwargsr-   r-   r.   _get_tokenization_kw   s   
z&InputPreprocessor._get_tokenization_kwprompttokenization_kwargsc                 C   sH   |   }| |}| jj}|r|ddr| }|jd||d|S )zn
        Apply the model's tokenizer to a text prompt, returning the
        corresponding token IDs.
        Zdo_lower_caseFrL   r1   Nr-   )r0   rK   r#   encoder_configgetlowerencode)r*   rL   r1   rM   r$   rO   r-   r-   r.   _tokenize_prompt   s   

z"InputPreprocessor._tokenize_promptc                    s0   |   }| |}|jd||d|I dH S )z{
        Async version of
        [`_tokenize_prompt`][vllm.inputs.preprocess.InputPreprocessor._tokenize_prompt].
        rN   Nr-   )r0   rK   Zencode_async)r*   rL   r1   rM   r$   r-   r-   r.   _tokenize_prompt_async   s   

z(InputPreprocessor._tokenize_prompt_asyncc                 C   s$   | j s	ttt S |  }||S r'   )r$   r   r   objectr0   r5   r*   r1   Ztokenizer_groupr-   r-   r.   _get_mm_tokenizer   s   
z#InputPreprocessor._get_mm_tokenizerc                    s,   | j s
ttt S |  }||I d H S r'   )r$   r   r   rU   r0   Zget_lora_tokenizer_asyncrV   r-   r-   r.   _get_mm_tokenizer_async   s
   z)InputPreprocessor._get_mm_tokenizer_asyncFmm_datamm_processor_kwargsreturn_mm_hashesc           	      C   s<   |  |}| jj| j|d}|du ri }|j|||||dS )z
        Apply the model's multi-modal processor to a multi-modal prompt,
        returning the corresponding token IDs and metadata.
        r$   NZhf_processor_mm_kwargsrM   r[   )rW   r%   create_processorr#   apply	r*   rL   rY   rZ   rM   r1   r[   r$   Zmm_processorr-   r-   r.   _process_multimodal   s   

z%InputPreprocessor._process_multimodalc           	         sD   |  |I dH }| jj| j|d}|du ri }|j|||||dS )z
        Async version of
        [`_process_multimodal`][vllm.inputs.preprocess.InputPreprocessor._process_multimodal].
        Nr\   r]   )rX   r%   r^   r#   r_   r`   r-   r-   r.   _process_multimodal_async  s   
z+InputPreprocessor._process_multimodal_asyncparsed_contentc                 C   sR   | j jstd|d }|jdkr|jdd}|jdkr tdt||dd	S )
Nz?You must set `--enable-prompt-embeds` to input `prompt_embeds`.prompt_embeds   r   )dim   z6prompt_embeds must be of shape (seq_len, hidden_size).
cache_salt)rd   rh   )r#   Zenable_prompt_embedsr/   ndimZsqueezer   rP   )r*   rc   rd   r-   r-   r.   _process_embeds/  s   

z!InputPreprocessor._process_embedsc                    s   |  |S r'   )rj   )r*   rc   r-   r-   r.   _process_embeds_asyncG  s   
z'InputPreprocessor._process_embeds_asyncc           
      C   sd   |d }| d}| d }r| j||| d|||d}nt||d}| d }	r0|	|d< |S Nprompt_token_idstoken_type_idsmulti_modal_datarZ   rM   r1   r[   )rm   rn   rh   )rP   ra   r   
r*   rc   rM   r1   r[   rm   rn   ro   inputsrh   r-   r-   r.   _process_tokensM  s$   
	z!InputPreprocessor._process_tokensc           
         sl   |d }| d}| d }r#| j||| d|||dI d H }nt||d}| d }	r4|	|d< |S rl   )rP   rb   r   rq   r-   r-   r.   _process_tokens_asyncl  s&   
	z'InputPreprocessor._process_tokens_asyncc           
      C   sj   |d }| d }r| j||| d|||d}n| j|||d}t||d}| d }	r3|	|d< |S NrL   ro   rZ   rp   )r1   rM   rL   rm   rh   )rP   ra   rS   r   
r*   rc   rM   r1   r[   Zprompt_textro   rr   rm   rh   r-   r-   r.   _process_text  s,   	zInputPreprocessor._process_textc           
         sx   |d }| d }r| j||| d|||dI d H }n| j|||dI d H }t||d}| d }	r:|	|d< |S ru   )rP   rb   rT   r   rw   r-   r-   r.   _process_text_async  s.   	z%InputPreprocessor._process_text_asyncc                 C   s   t |}|d dkr| |d S |d dkr!| j|d ||dS |d dkr2| j|d |||dS |d dkrF| jt|d d	|||dS t| d
S )af  
        Extract the singleton inputs from a prompt.

        Arguments:

        * prompt: single encoder or decoder input prompt
        * lora_request: this is only valid for decoder prompts
        * return_mm_hashes: whether to return multimodal hashes

        Returns:

        * [`SingletonInputs`][vllm.inputs.data.SingletonInputs] instance
        typeembedscontenttokensr1   r[   textrp   rG   rL   N)r!   rj   rs   rx   r   r   r*   rL   rM   r1   r[   parsedr-   r-   r.   _prompt_to_llm_inputs  s0   z'InputPreprocessor._prompt_to_llm_inputsc                    s   t |}|d dkr| |d I dH S |d dkr(| j|d ||dI dH S |d dkr<| j|d |||dI dH S |d d	krS| jt|d d
|||dI dH S t| dS )z
        Async version of
        [`_prompt_to_llm_inputs`][vllm.inputs.preprocess.InputPreprocessor._prompt_to_llm_inputs].
        rz   r{   r|   Nr}   r~   r   rp   rG   r   )r!   rk   rt   ry   r   r   r   r-   r-   r.   _prompt_to_llm_inputs_async   s2   z-InputPreprocessor._prompt_to_llm_inputs_asyncencoder_inputsdecoder_inputsc                 C   s   |d dks|r|d dkrt dttttf |}tttttf  |}|d u rB| jjjdkr8|d 	 }n| 
d }t|}nd|v rJt d| 
|d }||d< t||dS )	Nrz   r{   =Embedding inputs are not supported for encoder-decoder modelsrE   rm   ro   zJMulti-modal decoder inputs of encoder-decoder models are not supported yet)encoderdecoder)r/   r   r   r   r   r   r#   r=   rH   copyrC   r   r   )r*   r   r   Zdec_token_idsr-   r-   r.   _build_enc_dec_llm_inputs&  s8   
z+InputPreprocessor._build_enc_dec_llm_inputsrr   decoder_inputs_to_overridec                 C   s  |d dks|r|d dkrt dtttttf |}tttttf  |}|d dkrqd|v r5d|v s9tdtt|}t|d |d d}|pK|}td|	d	d
|d |d |d |d d}|	d }rm||d< ||fS |d dkrtd
g d}|p|}||fS t
| ||fS )zx
        For encoder/decoder models only:
        Separate Encoder/Decoder inputs from a MultiModalEncDecInputs
        rz   r{   r   Z
multimodalencoder_promptZencoder_prompt_token_idszXYou should register an encoder-decoder multi-modal processor for encoder-decoder models.rv   rL    rm   	mm_kwargs	mm_hashesmm_placeholders)rz   rL   rm   r   r   r   rh   token)r/   r   r   r   r   r   r   RuntimeErrorr   rP   r   )r*   rr   r   r   Zdecoder_prompt_inputsr   rh   r-   r-   r.   _split_enc_dec_mm_inputsO  sL   	

z*InputPreprocessor._split_enc_dec_mm_inputsc                 C   s   t |r*| j|d |d}|d  }du rd}n| |}| jjr)| ||\}}n| j||d}| jjr=| |\}}n|}d}| ||S )ab  
        For encoder/decoder models only:
        Process an input prompt into an
        [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
        instance.

        There are two types of input prompts:
        singleton prompts which carry only the
        encoder prompt, and explicit encoder/decoder
        prompts which carry both the encoder and the
        decoder prompts as member variables.

        This function handles the following scenarios:
        * Singleton encoder prompt: extract encoder prompt
          token ids & infer default decoder prompt token ids
        * Explicit encoder/decoder prompt: extract encoder
          and decoder prompt token ids

        Note that for Explicit encoder/decoder prompts,
        each sub-prompt (encoder or decoder prompt) can
        have any possible singleton type; thus this
        method relies on helper functions to obtain
        token ids for the sub-prompts.

        Arguments:

        * prompt: an input prompt

        Returns:

        * [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs]
          instance
        r   rM   decoder_promptN)r    r   r#   is_multimodal_modelr   r   )r*   rL   rM   r   decoder_inputr   rr   r-   r-   r.   _process_encoder_decoder_prompt  s0   )
z1InputPreprocessor._process_encoder_decoder_promptc           	         s   t |r=| j|d |d}|d  }du r|I dH }d}n| j||d}t||I dH \}}| jjr<| ||\}}n| j||dI dH }| jjrS| |\}}n|}d}| ||S )z
        Async version of
        [`_process_encoder_decoder_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_encoder_decoder_prompt].
        r   r   r   N)r    r   asynciogatherr#   r   r   r   )	r*   rL   rM   Zencoder_taskr   r   r   Zdecoder_taskrr   r-   r-   r.   %_process_encoder_decoder_prompt_async  s@   
z7InputPreprocessor._process_encoder_decoder_prompt_asyncprompt_inputsc                 C   s   d|v rt tttf |}|S )Nrm   )r   r   r   r   )r*   r   r-   r-   r.   _build_decoder_only_llm_inputs  s
   z0InputPreprocessor._build_decoder_only_llm_inputsc                 C   s   | j ||||d}| |S )ac  
        For decoder-only models:
        Process an input prompt into a
        [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance.

        Arguments:

        * prompt: input prompt
        * lora_request
        * return_mm_hashes

        Returns:

        * [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] instance
        rp   )r   r   r*   rL   rM   r1   r[   Zprompt_compsr-   r-   r.   _process_decoder_only_prompt  s   
z.InputPreprocessor._process_decoder_only_promptc                    s$   | j ||||dI dH }| |S )z
        Async version of
        [`_process_decoder_only_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_decoder_only_prompt].
        rp   N)r   r   r   r-   r-   r.   "_process_decoder_only_prompt_async/  s   
z4InputPreprocessor._process_decoder_only_prompt_asyncc                 C   sB   | j jr|r
J d| ||S t|rtd| j||||dS )zPreprocess the input prompt.z;Multimodal hashes for encoder-decoder models should not be z-returned until they are supported on vLLM V1.9Cannot pass encoder-decoder prompt to decoder-only modelsrp   )r#   r<   r   r    r/   r   r*   rL   rM   r1   r[   r-   r-   r.   
preprocessC  s   zInputPreprocessor.preprocessc                    sN   | j jr|rJ d| |I dH S t|rtd| j||||dI dH S )zo
        Async version of
        [`preprocess`][vllm.inputs.preprocess.InputPreprocessor.preprocess].
        r   Nr   rp   )r#   r<   r   r    r/   r   r   r-   r-   r.   preprocess_async`  s   z"InputPreprocessor.preprocess_asyncr'   )NNF)B__name__
__module____qualname__r   r   r   r   r   r)   r0   r
   intr8   r:   r?   listr@   rC   rF   rG   r   rK   rS   rT   r   rW   rX   r   r   r   rU   boolr   ra   rb   r   r   rj   rk   r   r   rs   rt   r   rx   ry   r   r   r   r   r   r   r   tupler   r   r   r   r   r   r   r   r   r   r   __classcell__r-   r-   r+   r.   r"      sB   


$

%






 


	

"

"

&

&
2
&
,


?
J
3

#

 r"   )/r   collections.abcr   typingr   r   r   r   Ztyping_extensionsr   Zvllm.configr   Zvllm.loggerr	   Zvllm.lora.requestr
   Zvllm.multimodalr   r   Zvllm.multimodal.inputsr   r   r   Z!vllm.transformers_utils.tokenizerr   Z'vllm.transformers_utils.tokenizer_groupr   datar   r   r   r   r   r   r   r   r   r   r   r   r   parser    r!   r   r3   r"   r-   r-   r-   r.   <module>   s   <