o
    )i                  	   @   s   d dl Z d dlmZmZmZmZ d dlZd dlmZ d dl	m
Z
mZ d dlmZmZ G dd dZdeeeeef  fd	d
ZdedefddZ	ddeeef de
dedefddZdS )    N)ListOptionalTupleUnion)PreTrainedTokenizerBase)CharacterLevelParserFormatEnforcerAnalyzer)TokenEnforcerTokenEnforcerTokenizerDatac                   @   sH   e Zd ZdefddZdd Zdedeee  dej	d	ej	fd
dZ
dS )TRTLLMLogitsProcessortoken_enforcerc                 C   s2   || _ |r	t|nd | _d | _tj | _|| _d S )N)r   r   analyzermaskmathinfmask_valeos_token_id)selfr   r   analyze r   p/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/lmformatenforcer/integrations/trtllm.py__init__
   s
   

zTRTLLMLogitsProcessor.__init__c                    s    fdd|  D S )Nc                    s.   g | ]}|t  jtr jn jgvr|qS r   )
isinstancer   list).0xr   r   r   
<listcomp>   s   
 z/TRTLLMLogitsProcessor._trim.<locals>.<listcomp>)tolist)r   inputr   r   r   _trim   s   zTRTLLMLogitsProcessor._trimstepbatch_input_idslogitsreturnc                 C   s   t t|D ]B}| jr| j|| ||   | j| || }| jd ur0| j	| j
 n
t|| | j
| _d| j|< || | j ||< q|S )Nr   )rangelenr   Zreport_raw_logitsr   r   Zget_allowed_tokensr    r   Zfill_r   torchZ	full_like)r   r!   r"   r#   idxZallowed_tokensr   r   r   __call__   s   

zTRTLLMLogitsProcessor.__call__N)__name__
__module____qualname__r	   r   r    intr   r'   ZTensorr)   r   r   r   r   r   	   s    *r   r$   c           	      C   s   t | dr	|  } t | dr| j} | dd g}g }| j}t|D ]1}|| jv r*q"tj||g tj	d}| 
|dd  }| 
|}t|t|k}||||f q"|S )Nget_tokenizer	tokenizer0Zdtype   )hasattrr.   r/   encode
vocab_sizer%   Zall_special_idsr'   tensorlongdecoder&   append)	r/   Ztoken_0regular_tokensr6   Z	token_idxZtensor_after_0Zdecoded_after_0Zdecoded_regularZis_word_start_tokenr   r   r   _build_regular_tokens_list'   s    



r<   r/   c                    s4   t  }dtt dtf fdd}t|| j}|S )z\Build the TokenEnforcerTokenizerData from a tokenizer in order to cache it between instancestokensr$   c                    s   t j| t jd} |S )Nr2   )r'   r7   r8   r9   )r=   r7   r/   r   r   _decode@   s   
z,build_trtlmm_tokenizer_data.<locals>._decode)r<   r   r-   strr
   r   )r/   r;   r?   tokenizer_datar   r>   r   build_trtlmm_tokenizer_data<   s   rB   Fcharacter_level_parserr   c                 C   s0   t | tr| }nt| }t||}t|| j|S )zf
    Build logits processor for feeding it into generate function (use_py_session should be True)
    )r   r
   rB   r	   r   r   )r/   rC   r   rA   r   r   r   r   build_trtllm_logits_processorH   s
   

rD   )F)r   typingr   r   r   r   r'   Ztransformersr   Zlmformatenforcerr   r   Zlmformatenforcer.tokenenforcerr	   r
   r   r-   r@   boolr<   rB   rD   r   r   r   r   <module>   s"    