o
    0 i                     @   s`   d dl mZmZ ddlmZ d dlZ			ddejdee dee deee  d	ef
d
dZ	dS )    )ListOptional   )LLTokenizerNhf_tokenizern_vocab	eos_tokenslicesreturnc                 C   s<   t | tjr| j }|du r| j}t||||dS td)aN  
    Create a new tokenizer from a fast Hugging Face tokenizer.
    This is an expensive operation (~1s), so the result should be cached.
    It currently only supports fast tokenizers, which are then handled
    by the Rust tokenizers library.

    Args:
        hf_tokenizer: transformers.PreTrainedTokenizerFast - the tokenizer to wrap
        n_vocab: int - override the size of the vocabulary
        eos_token: int - override the EOS token
        slices: List[str] - configuration for slicer optimization; pass [] to disable,
            or None to use the default configuration
    N)r   r   r	   z"Only fast tokenizers are supported)
isinstancetransformersPreTrainedTokenizerFastZbackend_tokenizerZto_strZeos_token_idr   
ValueError)r   r   r   r	   s r   Y/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/llguidance/hf.pyfrom_tokenizer   s   
r   )NNN)
typingr   r   Z_libr   r   r   intstrr   r   r   r   r   <module>   s"    
