o
    )i,                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZmZ d dlZd dlmZmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ erud dl m!Z! d dl"m#Z# d dl$m%Z% neZ!eZ#eZ%ee&Z'eeee%f Z(ddde(de)e* dee+ de,fddZ-ddddde(de,dee+ dee* dee+ de)e* fddZ.de(de(fddZ/deddfd d!Z0d"d#ddd$d%ee,ef d&e,d'e+d(ee, d)ee, de(fd*d+Z1ee1Z2d,e!d-efd.d/Z3d0e#dee( fd1d2Z4ee4Z5dS )3    N)	lru_cache)Path)
MethodType)TYPE_CHECKINGAnyOptionalUnion)AutoTokenizerPreTrainedTokenizerPreTrainedTokenizerFast)envs)init_logger))get_sentence_transformer_tokenizer_config)MistralTokenizer)check_gguf_file)
make_async)ModelConfig)LoRARequest)TokenizerBaseskip_special_tokens	tokenizer	token_idsr   returnc                C   s*   t | d| j}|dur|||dS ||S )z
    Backend-agnostic equivalent of HF's
    `tokenizer.decode(token_ids, ...)`.

    `skip_special_tokens=None` means to use the backend's default
    settings.
    _decodeNr   )getattrdecode)r   r   r   Zdecode_method r   m/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/transformers_utils/tokenizer.pydecode_tokens(   s   r   )
truncation
max_lengthadd_special_tokenstextr    r!   r"   c                C   sF   i }|dur
||d< |dur||d< |dur||d< | j |fi |S )z
    Backend-agnostic equivalent of HF's
    `tokenizer.encode(text, ...)`.

    `add_special_tokens=None` means to use the backend's default
    settings.
    Nr!   r    r"   )encode)r   r#   r    r!   r"   Zkw_argsr   r   r   encode_tokens=   s   r%   c                    s   t  }jjj tt  tdr<t	
t t j W d   n1 s7w   Y  G  fdddj}djj |_||_|S )z
    By default, transformers will recompute multiple tokenizer properties
    each time they are called, leading to a significant slowdown.
    This proxy caches these properties for faster access.
    
vocab_sizeNc                       s   e Zd Zedee ffddZedee ffddZedee ffddZ	edef fdd	Z
deeef ffd
dZdeffddZfddZdS )z-get_cached_tokenizer.<locals>.CachedTokenizerr   c                        S Nr   self)tokenizer_all_special_idsr   r   all_special_idst      z=get_cached_tokenizer.<locals>.CachedTokenizer.all_special_idsc                    r'   r(   r   r)   )tokenizer_all_special_tokensr   r   all_special_tokensx   r-   z@get_cached_tokenizer.<locals>.CachedTokenizer.all_special_tokensc                    r'   r(   r   r)   )%tokenizer_all_special_tokens_extendedr   r   all_special_tokens_extended|   r-   zIget_cached_tokenizer.<locals>.CachedTokenizer.all_special_tokens_extendedc                    r'   r(   r   r)   )max_token_idr   r   r2      r-   z:get_cached_tokenizer.<locals>.CachedTokenizer.max_token_idc                    r'   r(   r   r)   )tokenizer_vocabr   r   	get_vocab      z7get_cached_tokenizer.<locals>.CachedTokenizer.get_vocabc                    r'   r(   r   r)   )tokenizer_lenr   r   __len__   r5   z5get_cached_tokenizer.<locals>.CachedTokenizer.__len__c                    s
   t  ffS r(   )get_cached_tokenizerr)   )r   r   r   
__reduce__   s   
z8get_cached_tokenizer.<locals>.CachedTokenizer.__reduce__N)__name__
__module____qualname__propertylistintr,   strr/   r1   r2   dictr4   r7   r9   r   r2   r   r+   r.   r0   r6   r3   r   r   CachedTokenizerr   s    rC   ZCached)copyr,   r/   r1   r4   lenmaxvalueshasattr
contextlibsuppressNotImplementedErrorr&   	__class__r:   )r   Zcached_tokenizerrC   r   rB   r   r8   Z   s    

"r8   c                    s8   j  dddtdtt f fdd}t|_ dS )z@Patch _pad method to accept `padding_side` for older tokenizers.N)padding_sider*   rM   c                   s@   |d ur|| j krdtj d}tj|dd  |i |S )Nz,`padding_side` argument is not supported by z and will be ignored.   
stacklevel)rM   typer:   warningswarn)r*   rM   argskwargsmsgZorig_padr   r   r   _pad   s   z patch_padding_side.<locals>._pad)rX   r
   r   r@   r   )r   rX   r   rW   r   patch_padding_side   s   rY   autoF)tokenizer_modetrust_remote_coderevisiondownload_dirtokenizer_namer[   r\   r]   r^   c             
   O   sF  t jr9ddlm} ddlm} tj| s9|| | || ||t	j
jg dd}	|	} W d   n1 s4w   Y  |dkrK|dd	rGtd
d	|d< d|vrSd|d< t| }
|
ret| j|d< t| j} t| dd dk}|r~|dkr~tjdtdd |dkrtjt| |d}|S |dkrddlm} |jt| g|R ||d|}|S ztj| g|R ||d|}W n# ty } z|sdt|v sdt|v rd}t|||d}~ww t| |}t|t r|dd	rdd |j!" D }|#| t$|j%dv rt|t&sJ t'| t|t(st)*d  t+|}|S )!zMGets a tokenizer for the given model name via HuggingFace or ModelScope.
    r   )snapshot_download)get_lock)z.*.ptz.*.safetensorsz.*.bin)Zmodel_id	cache_dirr]   Zlocal_files_onlyZignore_file_patternNZslowZuse_fastFz5Cannot use the fast tokenizer in slow tokenizer mode.Ztruncation_sideleftZ	gguf_file/Z	mistralaiZmistralz{It is strongly recommended to run mistral models with `--tokenizer-mode "mistral"` to ensure correct encoding and decoding.rN   rO   )r]   Zcustom)TokenizerRegistry)r]   r^   )r\   r]   z,does not exist or is not currently imported.z*requires you to execute the tokenizer filezFailed to load the tokenizer. If the tokenizer is a custom tokenizer not yet available in the HuggingFace transformers library, consider setting `trust_remote_code=True` in LLM or using the `--trust-remote-code` flag in the CLI.Zdo_lower_casec                 S   s   i | ]	\}}||  qS r   )lower).0kvr   r   r   
<dictcomp>  s    z!get_tokenizer.<locals>.<dictcomp>)ZChatGLMTokenizerZChatGLM4TokenizerziUsing a slow tokenizer. This might cause a significant slowdown. Consider using a fast tokenizer instead.),r   ZVLLM_USE_MODELSCOPEZ modelscope.hub.snapshot_downloadr`   Z-vllm.model_executor.model_loader.weight_utilsra   ospathexistshuggingface_hub	constantsZHF_HUB_OFFLINEget
ValueErrorr   r   nameparentr@   splitrR   rS   FutureWarningr   Zfrom_pretrained&vllm.transformers_utils.tokenizer_basere   get_tokenizerr	   RuntimeErrorr   
isinstancerA   special_tokens_mapitemsr"   rQ   r:   r
   rY   r   loggerwarningr8   )r_   r[   r\   r]   r^   rT   rU   r`   ra   Ztokenizer_pathZis_ggufZis_from_mistral_orgr   re   eerr_msgZencoder_configrz   r   r   r   rw      s   


:6


rw   model_configrU   c                 K   s    t | jf| j| j| jd|S )N)r[   r]   r\   )cached_get_tokenizerr   r[   Ztokenizer_revisionr\   )r   rU   r   r   r   cached_tokenizer_from_config$  s   r   lora_requestc              
   O   sf   | d u rd S zt | jg|R i |}W |S  ty2 } ztd| j| d }W Y d }~|S d }~ww )NzMNo tokenizer found in %s, using base model tokenizer instead. (Exception: %s))rw   Z	lora_path	Exceptionr|   r}   )r   rT   rU   r   r~   r   r   r   get_lora_tokenizer1  s   r   )6rI   rD   rk   rR   	functoolsr   pathlibr   typesr   typingr   r   r   r   rn   Ztransformersr	   r
   r   Zvllmr   Zvllm.loggerr   Zvllm.transformers_utils.configr   Z"vllm.transformers_utils.tokenizersr   Zvllm.transformers_utils.utilsr   Z
vllm.utilsr   Zvllm.configr   Zvllm.lora.requestr   rv   r   r:   r|   ZAnyTokenizerr>   r?   boolr@   r   r%   r8   rY   rw   r   r   r   Zget_lora_tokenizer_asyncr   r   r   r   <module>   s   

9

z

