o
    )iZ!                     @   s   d Z ddlmZmZmZmZmZmZmZm	Z	 ddl
mZ ddlmZmZ ddlmZmZmZ ddlmZ G dd	 d	eZG d
d deZdS )z6Compiling grammar for efficient token mask generation.    )AnyDictListOptionalTupleTypeUnionoverload)	BaseModel   )	XGRObject_core)GrammarStructuralTagItem_convert_schema_to_str)TokenizerInfoc                   @   sn   e Zd ZdZedefddZedefddZede	fddZ
defd	d
Zedededd fddZdS )CompiledGrammara#  This is the primary object to store compiled grammar.

    A CompiledGrammar can be used to construct GrammarMatcher
    to generate token masks efficiently.

    Notes
    -----
    Do not construct this class directly, instead
    use :class:`GrammarCompiler` to construct the object.
    returnc                 C      t | jjS )zThe original grammar.)r   _create_from_handle_handlegrammarself r   ]/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/xgrammar/compiler.pyr         zCompiledGrammar.grammarc                 C   r   )z8The tokenizer info associated with the compiled grammar.)r   r   r   tokenizer_infor   r   r   r   r      r   zCompiledGrammar.tokenizer_infoc                 C      | j jS )z>The approximate memory usage of the compiled grammar in bytes.)r   memory_size_bytesr   r   r   r   r   "   s   z!CompiledGrammar.memory_size_bytesc                 C   
   | j  S )z0Serialize the compiled grammar to a JSON string.)r   serialize_jsonr   r   r   r   r!   '      
zCompiledGrammar.serialize_jsonjson_strr   c                 C   s   t tj | |jS )zGDeserialize the compiled grammar from a JSON string and tokenizer info.)r   r   r   deserialize_jsonr   )r#   r   r   r   r   r$   +   s   z CompiledGrammar.deserialize_jsonN)__name__
__module____qualname____doc__propertyr   r   r   r   intr   strr!   staticmethodr$   r   r   r   r   r      s    r   c                   @   sJ  e Zd ZdZdddddededed	efd
dZddddddee	e
e ee	ef f dedee deee	e	f  dedefddZdefddZde	defddZdee dee	 defddZedd d!e	d"e	defd#d$Zed%edefd&d$Zdd d%ee	ef d"e	defd'd$Zd.d(d)Zdefd*d+Zedefd,d-ZdS )/GrammarCompilera:  The compiler for grammars. It is associated with a certain tokenizer info, and compiles
    grammars into CompiledGrammar with the tokenizer info. It allows parallel compilation with
    multiple threads, and has a cache to store the compilation result, avoiding compiling the
    same grammar multiple times.
       T)max_threadscache_enabledcache_limit_bytesr   r0   r1   r2   c                C   s.   t |ts	td| t|j||| dS )a  Construct the compiler.

        Parameters
        ----------
        tokenizer_info : TokenizerInfo
            The tokenizer info.

        max_threads : int, default: 8
            The maximum number of threads used to compile the grammar.

        cache_enabled : bool, default: True
            Whether to enable the cache.

        cache_limit_bytes : int, default: -1
            The maximum memory usage for the cache in the specified unit.
            Note that the actual memory usage may slightly exceed this value.
        zSPlease convert the tokenizer to TokenizerInfo before passing it to GrammarCompiler.N)
isinstancer   
ValueErrorZ_init_handler   r-   r   )r   r   r0   r1   r2   r   r   r   __init__:   s   

zGrammarCompiler.__init__N)any_whitespaceindent
separatorsstrict_modeschemar6   r7   r8   r9   r   c             	   C   s"   t |}t| j|||||S )a  Get CompiledGrammar from the specified JSON schema and format. The indent
        and separators parameters follow the same convention as in json.dumps().

        Parameters
        ----------
        schema : Union[str, Type[BaseModel], Dict[str, Any]]
            The schema string or Pydantic model or JSON schema dict.

        indent : Optional[int], default: None
            The number of spaces for indentation. If None, the output will be in one line.

        separators : Optional[Tuple[str, str]], default: None
            Two separators used in the schema: comma and colon. Examples: (",", ":"), (", ", ": ").
            If None, the default separators will be used: (",", ": ") when the indent is not None,
            and (", ", ": ") otherwise.

        strict_mode : bool, default: True
            Whether to use strict mode. In strict mode, the generated grammar will not allow
            properties and items that is not specified in the schema. This is equivalent to
            setting unevaluatedProperties and unevaluatedItems to false.

            This helps LLM to generate accurate output in the grammar-guided generation with JSON
            schema.

        Returns
        -------
        compiled_grammar : CompiledGrammar
            The compiled grammar.
        )r   r   r   r   compile_json_schema)r   r:   r6   r7   r8   r9   Z
schema_strr   r   r   r;   _   s   &
z#GrammarCompiler.compile_json_schemac                 C   s   t | j S )zGet CompiledGrammar from the standard JSON.

        Returns
        -------
        compiled_grammar : CompiledGrammar
            The compiled grammar.
        )r   r   r   compile_builtin_json_grammarr   r   r   r   r<      s   z,GrammarCompiler.compile_builtin_json_grammarregexc                 C   s   t | j|S )zGet CompiledGrammar from the specified regex.

        Parameters
        ----------
        regex : str
            The regex string.

        Returns
        -------
        compiled_grammar : CompiledGrammar
            The compiled grammar.
        )r   r   r   compile_regex)r   r=   r   r   r   r>      s   zGrammarCompiler.compile_regextagstriggersc                 C   s"   dd |D }t | j||S )a  Compile a grammar from structural tags. See Grammar.from_structural_tag() for more
        details.

        Parameters
        ----------
        tags : List[StructuralTagItem]
            The structural tags.

        triggers : List[str]
            The triggers.

        Returns
        -------
        compiled_grammar : CompiledGrammar
            The compiled grammar.
        c                 S   s    g | ]}|j t|j|jfqS r   )beginr   Zschema_end).0tagr   r   r   
<listcomp>   s     z:GrammarCompiler.compile_structural_tag.<locals>.<listcomp>)r   r   r   compile_structural_tag)r   r?   r@   Z
tags_tupler   r   r   rF      s   z&GrammarCompiler.compile_structural_tagrootroot_rule_nameebnf_stringrI   c                C      d S Nr   )r   rJ   rI   r   r   r   compile_grammar   s   zGrammarCompiler.compile_grammarr   c                 C   rK   rL   r   )r   r   r   r   r   rM      s   c                C   s,   t |trtj||d}t| j|jS )am  Compile a grammar object.

        Overloads:

        1. ``compile_grammar(ebnf_string: str, *, root_rule_name: str = "root") -> CompiledGrammar``
            - Compile a grammar from an EBNF string. The string should follow the format described
              in https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md.

        2. ``compile_grammar(grammar: Grammar) -> CompiledGrammar``
            - Compile a grammar from a Grammar object.

        Parameters
        ----------
        ebnf_string : str
            The grammar string in EBNF format.
        root_rule_name : str, default: "root"
            The name of the root rule in the grammar.
        grammar : Union[str, Grammar]
            The grammar string or Grammar object.

        Returns
        -------
        compiled_grammar : CompiledGrammar
            The compiled grammar.
        rH   )r3   r+   r   Z	from_ebnfr   r   r   rM   )r   r   rI   r   r   r   rM      s   
c                 C   s   | j   dS )z#Clear all cached compiled grammars.N)r   clear_cacher   r   r   r   rN      s   zGrammarCompiler.clear_cachec                 C   r    )z3The approximate memory usage of the cache in bytes.)r   get_cache_size_bytesr   r   r   r   rO      r"   z$GrammarCompiler.get_cache_size_bytesc                 C   r   )zw
        The maximum memory usage for the cache in bytes.
        Returns -1 if the cache has no memory limit.
        )r   r2   r   r   r   r   r2      s   z!GrammarCompiler.cache_limit_bytes)r   N)r%   r&   r'   r(   r   r*   boolr5   r   r+   r   r
   r   r   r   r   r   r;   r<   r>   r   r   rF   r	   rM   r   rN   rO   r)   r2   r   r   r   r   r-   3   s~    

)
-




 r-   N)r(   typingr   r   r   r   r   r   r   r	   Zpydanticr
   baser   r   r   r   r   r   r   r   r   r-   r   r   r   r   <module>   s    ('