o
    )iX                     @   s   d dl mZmZ d dlZd dlmZ eeee ejgejf eee ee ejgejf f Z		 dee
 dedee	 fddZG d	d
 d
ZdS )    )CallableUnionN)AnyTokenizer	bad_words	tokenizerreturnc                 C   s   t  }| D ]7}dD ]2}|rdnd}||  }|j|dd}|r6|r;|d |d d kr;t|t|d kr;|| q	qt|dgS )	N)FT  F)textZadd_special_tokensr   )bad_words_ids)listlstripencodelenappendNoBadWordsLogitsProcessor)r   r   r   Zbad_wordZadd_prefix_spaceprefixpromptZprompt_token_ids r   _/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/logits_process.pyget_bad_words_logits_processors   s"   
r   c                   @   s~   e Zd ZedZdZdeee  fddZde	ee e
e f dejdejfd	d
ZdejddfddZdeddfddZdS )r   z-infg        r   c                 C   s   || _ d | _d S )N)r   	word_bias)selfr   r   r   r   __init__4   s   
z"NoBadWordsLogitsProcessor.__init__past_tokens_idslogitsr   c           
      C   s   | j d u r| j|d t|}| jD ]K}t|dkrqt|t|d kr'qt|d }|d }|| d  }|d | }t|t|ksHJ t|t|k}	||  |	rY| jn| j7  < q|| j  | }|S )N)r      r   )	r   _init_word_biastorchZ
zeros_liker   r   tuple_SMALLEST_LOGIT_NEUTRAL_LOGIT)
r   r   r   Zlast_token_biasbad_word_idsZprefix_lengthZlast_token_idZactual_prefixZexpected_prefixZis_matchr   r   r   __call__8   s&   


z"NoBadWordsLogitsProcessor.__call__Nc                 C   s^   |j d }| j|d tj|ftj|jd| _| jD ]}t|dkr,|d }| j	| j|< qd S )Nr   )
vocab_size)Zdtypedevicer   )
shape_check_token_ids_boundsr   Zzerosfloatr&   r   r   r   r!   )r   r   r%   r#   Zbad_word_idr   r   r   r   X   s   

z)NoBadWordsLogitsProcessor._init_word_biasr%   c                 C   s`   g }| j D ]}|D ]}|dk s||kr|| q	qt|dkr.td| d| d| dd S )Nr   zThe model vocabulary size is z2, but the following tokens were specified as bad: zE. All token id values should be integers satisfying: 0 <= token_id < .)r   r   r   
ValueError)r   r%   Zinvalid_token_idsr#   Ztoken_idr   r   r   r(   i   s    

z1NoBadWordsLogitsProcessor._check_token_ids_bounds)__name__
__module____qualname__r)   r!   r"   r   intr   r   r    r   ZFloatTensorTensorr$   r   r(   r   r   r   r   r   0   s    
 r   )typingr   r   r   Z!vllm.transformers_utils.tokenizerr   r   r/   r0   ZLogitsProcessorstrr   r   r   r   r   r   <module>   s"   
