o
    Ê)i«  ã                	   @   sä   zd dl Z d dlZd dlmZ d dlmZ W n ey!   edƒ‚w d dlmZm	Z	m
Z
mZ d dlmZ d dlmZmZmZ d dlZG dd	„ d	ƒZd
eejef defdd„Z	ddeejeef dededefdd„ZddgZdS )é    N)ÚMistralTokenizer)ÚPreTrainedTokenizerBasez@vllm is not installed. Please install it with "pip install vllm")ÚCharacterLevelParserÚTokenEnforcerÚFormatEnforcerAnalyzerÚTokenEnforcerTokenizerData)Ú#build_token_enforcer_tokenizer_data)ÚListÚOptionalÚUnionc                   @   s8   e Zd Zdefdd„Zdee dejdejfdd„Z	d	S )
ÚVLLMLogitsProcessorÚtoken_enforcerc                 C   s"   || _ |r	t|ƒnd | _d | _d S )N)r   r   ÚanalyzerÚmask)Úselfr   Úanalyze© r   ún/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/lmformatenforcer/integrations/vllm.pyÚ__init__   s   
zVLLMLogitsProcessor.__init__Ú	input_idsÚscoresÚreturnc                 C   sn   |}| j r| j  || ¡ ¡ | j |¡}| jd ur"| j tj ¡ n	t	 
|tj ¡| _d| j|< || j }|S )Nr   )r   Zreport_raw_logitsÚtolistr   Zget_allowed_tokensr   Zfill_ÚmathÚinfÚtorchZ	full_like)r   r   r   Ztoken_sequenceZallowed_tokensr   r   r   Ú__call__   s   


zVLLMLogitsProcessor.__call__N)
Ú__name__Ú
__module__Ú__qualname__r   r   r	   Úintr   ZTensorr   r   r   r   r   r      s    "r   Ú	tokenizerr   c                 C   s\   d }t | dƒr| j ¡  ¡ }t | dƒr|  ¡ } t| tƒr!t| |ƒS t | dƒr)| j} t| |ƒS )NÚ
llm_engineÚget_tokenizerr!   )	Úhasattrr"   Zget_model_configZget_vocab_sizer#   Ú
isinstancer   r   r!   )r!   Z
vocab_sizer   r   r   Ú(build_vllm_token_enforcer_tokenizer_data#   s   





r&   FÚllmÚcharacter_level_parserr   c                 C   s&   t | tƒs	t| ƒ} t| |ƒ}t||ƒS )zåBuild the logits processor function that llama.cpp will use to filter the tokens generated by the model. The result
    can be passed in the logits_processor list that is sent to the call or generate() method of llama.cpp models.)r%   r   r&   r   r   )r'   r(   r   r   r   r   r   Úbuild_vllm_logits_processor1   s   


r)   )F)r   ZvllmZ!vllm.transformers_utils.tokenizerr   Ztransformersr   ÚImportErrorZlmformatenforcerr   r   r   r   Z*lmformatenforcer.integrations.transformersr   Útypingr	   r
   r   r   r   ZLLMr&   Úboolr)   Ú__all__r   r   r   r   Ú<module>   s.    ÿþÿþ
þ