o
    )i                     @   sb   d dl mZ d dlmZmZmZmZmZ ddlm	Z	m
Z
 ddlmZ ddlmZ G dd dZd	S )
    )Optional)VLLM_INVALID_TOKEN_IDLogprobSamplingParamsSequenceSequenceGroup   )convert_prompt_ids_to_tokensdetokenize_incrementally)AnyTokenizer)TokenizerGroupc                   @   sp   e Zd ZdZdefddZdedefddZd	e	d
e
eeeef   deddfddZdededefddZdS )Detokenizerz;Provides methods to decode the output of a model into text.tokenizer_groupc                 C   s
   || _ d S )N)r   )selfr    r   o/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/transformers_utils/detokenizer.py__init__   s   
zDetokenizer.__init__sequencereturnc                 C   s   | j |jS )z5Returns the HF tokenizer to use for a given sequence.)r   Zget_lora_tokenizerZlora_request)r   r   r   r   r   get_tokenizer_for_seq   s   z!Detokenizer.get_tokenizer_for_seq	seq_groupprompt_logprobsposition_offsetNc                 C   s  |j }|dus	J | d }| }|dd }| |}d}	d}
d}d}g }d}t|D ]X\}}|| }|s9q.| D ]6\}}|jdu rs|tkrs|d| |g }t||||	|
|j	|j
d\}}}}||_||| krs|}|}|}q=|}	|}
|du r| }q.|| q.dS )a  Decodes the logprobs for the prompt of a sequence group.

        Args:
            seq_group: The sequence group to decode.
            prompt_logprobs: The logprobs to decode.
            position_offset: Offset of the first index of the logprobs 
                relative to the start of the sequence (for chunked prefill).
        
        Returns:
            The prompt logprobs with the decoded tokens.
        Nr   	tokenizerall_input_idsprev_tokensprefix_offsetread_offsetskip_special_tokensspaces_between_special_tokens)Zsampling_paramsZget_seqsget_token_idsr   	enumerateitemsdecoded_tokenr   r
   r    r!   copyextend)r   r   r   r   prmsseqZall_token_idsZprompt_token_idsr   r   r   Znext_iter_prefix_offsetZnext_iter_read_offsetZnext_iter_tokensr   Ztoken_position_in_logprobZprompt_logprobs_for_tokenZtoken_positiontoken_idsample_logprobZprompt_token_ids_with_token
new_tokensnew_textZnew_prefix_offsetZnew_read_offsetr   r   r   decode_prompt_logprobs_inplace   s^   


z*Detokenizer.decode_prompt_logprobs_inplacer)   r(   c              
   C   s$  |  }|d }| |}|jdu r$t||dd |jd\|_|_|_t|||j|j|j|j|jd\}}}}	|j	d }
|
r{|dd }|

 D ]1\}}||krU||_qI|jdu rz|tkrz||g }t|||j|j|j|j|jd\}}}}||_qI|j| ||_|	|_| j|7  _t|S )a  Decodes the new token for a sequence. In-place operation.

        Args:
            seq: The sequence to decode.
            prms: The sampling parameters used to generate the sequence.

        Returns:
            The number of characters added to the output text.
        r   N)r   Z
prompt_idsr    r   )r"   r   tokensr	   r    r   r   r
   r!   Zoutput_logprobsr$   r%   r   r'   Zoutput_textlen)r   r)   r(   r   Z!token_id_generated_this_iterationr   r,   Znew_decoded_token_textr   r   ZlogprobsZprevious_tokensr*   r+   Zall_input_ids_with_logprob_r-   r   r   r   decode_sequence_inplaced   sb   







z#Detokenizer.decode_sequence_inplace)__name__
__module____qualname____doc__r   r   r   r   r   r   listr   dictintr   r.   r   r2   r   r   r   r   r      s$    
Kr   N)typingr   Zvllm.sequencer   r   r   r   r   Zdetokenizer_utilsr	   r
   r   r   r   r   r   r   r   r   r   <module>   s   