o
    )i-                     @   sN  d Z ddlZddlmZmZmZmZmZmZm	Z	 ddl
Z
ddlmZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZmZ dd
lmZ dddddde	eee eeef f dedee deeeef  dedefddZdAdededefddZdBdededefddZde	eef defdd Z d!d!dd"de	eef d#ed$ed%ed&edefd'd(Z!	dCd)e
j"d*ed+edee fd,d-Z#	dCd)e
j"d*ed+edeeef fd.d/Z$d0e
j"de
j"fd1d2Z%	dDde	eef d3ee defd4d5Z&d6edee fd7d8Z'dEd9ee d:ee defd;d<Z(dEd9ee) d:ee) defd=d>Z*G d?d@ d@Z+dS )FzTesting utilities.

The APIs in this module are used for testing and debugging and are prone to
change. Don't use them in production.    N)AnyDictListOptionalTupleTypeUnion)	BaseModel   )_core)CompiledGrammarGrammarCompiler)Grammar_convert_schema_to_str)GrammarMatcherbitmask_dtype)TokenizerInfoT)any_whitespaceindent
separatorsstrict_modeschemar   r   r   r   returnc                C   s   t | }tj|||||S )a\  Convert JSON schema string to BNF grammar string. For test purposes.

    Parameters
    ----------
    schema : Union[str, Type[BaseModel], Dict[str, Any]]
        The schema string or Pydantic model or JSON schema dict.

    indent : Optional[int], default: None
        The number of spaces for indentation. If None, the output will be in one line.

    separators : Optional[Tuple[str, str]], default: None
        Two separators used in the schema: comma and colon. Examples: (",", ":"), (", ", ": ").
        If None, the default separators will be used: (",", ": ") when the indent is not None,
        and (", ", ": ") otherwise.

    strict_mode : bool, default: True
        Whether to use strict mode. In strict mode, the generated grammar will not allow
        properties and items that is not specified in the schema. This is equivalent to
        setting unevaluatedProperties and unevaluatedItems to false.

        This helps LLM to generate accurate output in the grammar-guided generation with JSON
        schema.

    Returns
    -------
    bnf_string : str
        The BNF grammar string.
    )r   r   testing_json_schema_to_ebnf)r   r   r   r   r   Z
schema_str r   \/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/xgrammar/testing.pyr      s   $
r   regexwith_rule_namec                 C      t j| |S )as  Convert a regex string to BNF grammar string. For test purposes. The regex grammar
    follows the syntax in JavaScript (ECMA 262). Check
    https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Regular_expressions
    for a tutorial. Currently the following features are not supported:
    1. Backreference (\1)
    2. non-capturing group, naming capture groups and assertions ((?...))
    3. Unicode character class escape (\p{...})
    4. Word boundary (\b)
    5. Unicode property escapes (\p{...})
    6. Quantifier with range {x,y}. Now user can just repeat the element as a workaround.

    This method is primarily intended for testing and debugging purposes.

    Parameters
    ----------
    regex : str
        The regex string to be converted.

    Returns
    -------
    bnf_string : str
        The BNF grammar string converted from the input regex.
    )r   r   _regex_to_ebnf)r   r   r   r   r   r    =   s   r    rootebnf_stringroot_rule_namec                 C   s   t tj| |S )a  Convert a BNF grammar string to a Grammar object without normalization. For test
    purposes. The result grammar cannot be compiled / used in GrammarMatcher.

    Parameters
    ----------
    ebnf_string : str
        The BNF grammar string to be converted.

    Returns
    -------
    grammar : Grammar
        The unnormalized Grammar object converted from the input BNF grammar string.
    )r   _create_from_handler   r   !_ebnf_to_grammar_no_normalization)r"   r#   r   r   r   r%   X   s   r%   grammarc                 K   s2   t g }t|dd}|| }t|fddi|S )a  Create a GrammarMatcher from a grammar. The tokenizer info will be set to an empty
    TokenizerInfo. The result matcher can only accept strings, and cannot accept tokens.

    Parameters
    ----------
    grammar : Union[Grammar, str]
        The grammar to create the matcher from. Can be either a Grammar object or a string
        containing EBNF grammar.

    Returns
    -------
    matcher : GrammarMatcher
        The created grammar matcher.
    FZcache_enabledZterminate_without_stop_tokenTr   r   Zcompile_grammarr   )r&   kwargstokenizer_infogrammar_compilercompiled_grammarr   r   r   _get_matcher_from_grammark   s   
r-   F)debug_print
print_timerequire_termination	input_strr.   r/   r0   c          	      C   sj   t | }|r
t }|j||d}|r)t }td| d| d|| d  d |s-dS |s1dS | S )	a#  Check if a grammar accepts a string. For test purposes.

    Parameters
    ----------
    grammar : Union[Grammar, str]
        The grammar to check. Can be either a Grammar object or a BNF grammar string.
    input_str : str
        The input string to check.
    debug_print : bool, default: False
        Whether to print debug information during matching.
    print_time : bool, default: False
        Whether to print timing information.

    Returns
    -------
    bool
        True if the grammar accepts the string, False otherwise.
    )r.   z
Accepting z
, result: z, time: g     @@z usFT)r-   timemonotonic_nsZaccept_stringprintZis_terminated)	r&   r1   r.   r/   r0   Zgrammar_matcherstartacceptedendr   r   r   _is_grammar_accept_string   s   $r8   bitmask
vocab_sizeindexc                 C   sJ   | j jdkr
td| jtkrtdt dtj|  t	| j
||S )a  Get the ids of the rejected tokens from the bitmask. Mainly for debug purposes.

    Parameters
    ----------
    bitmask : torch.Tensor
        The rejected token bitmask. Should be generated by allocate_token_bitmask and
        filled by fill_next_token_bitmask. Should be on CPU.

    index : int, default: 0
        The batch index of the bitmask. For batch inference, bitmask[index] will be used.
        Otherwise is ignored.

    Returns
    -------
    rejected_token_ids : List[int]
        A list of rejected token ids.
    cpuzbitmask should be on CPU.zbitmask should be of type .)devicetype
ValueErrordtyper   r   r   _get_masked_tokens_from_bitmaskdata_ptrlistshaper9   r:   r;   r   r   r   rB      s   
rB   c                 C   s   t j|  t| j||S )a  Check if the bitmask is a single token bitmask.

    Parameters
    ----------
    bitmask : torch.Tensor
        The bitmask to check. Should be on CPU.
    vocab_size : int
        The size of the vocabulary.
    index : int, default: 0
        The index of the bitmask.

    Returns
    -------
    is_single_token : bool
        True if the bitmask is a single token bitmask, False otherwise.
    token_id : int
        The id of the token if the bitmask is a single token bitmask, -1 otherwise.
    )r   r   _is_single_token_bitmaskrC   rD   rE   rF   r   r   r   rG      s   rG   	bool_maskc                 C   s   |  tj}d| jd d  d }|dkr!tjjj|d|fdd}|| jd dd}tjdd t	dD | j
tjd tj}|| jd	d
}| tjS )a  Get the bitmask from bool mask. If the bool mask does not align with the 32-bit block
    size, it will add extra 1 paddings.

    Parameters
    ----------
    bool_mask : torch.Tensor
        The rejected token bool mask. For each element value, True means the token is allowed,
        while False means the token is rejected.

    Returns
    -------
    bitmask : torch.Tensor
        The rejected token bitmask.
        r
   r   )valuec                 S   s   g | ]}d |> qS )r
   r   ).0ir   r   r   
<listcomp>   s    z)_bool_mask_to_bitmask.<locals>.<listcomp>)r>   rA      )dim)totorchZint32rE   nnZ
functionalpadviewZtensorranger>   Zint64sum)rH   Zbool_mask_int32Zpad_sizeZbool_mask_viewweightsr9   r   r   r   _bool_mask_to_bitmask   s   rY   r*   c                 K   s6   |du rt g }t|dd}|| }t|fi |S )ak  Create a GrammarMatcher from a grammar and tokenizer info.

    Parameters
    ----------
    grammar : Union[Grammar, str]
        The grammar to create the matcher from. Can be either a Grammar object or a string
        containing EBNF grammar.
    tokenizer_info : Optional[TokenizerInfo], default: None
        Information about the tokenizer to use with this grammar. If None, an empty
        TokenizerInfo will be created.
    **kwargs
        Additional keyword arguments to pass to the GrammarMatcher constructor.

    Returns
    -------
    matcher : GrammarMatcher
        The created grammar matcher.
    NFr'   r(   )r&   r*   r)   r+   r,   r   r   r   ,_get_matcher_from_grammar_and_tokenizer_info  s
   
rZ   r,   c                 C   s   t j| jS N)r   r   _get_allow_empty_rule_ids_handle)r,   r   r   r   r\        r\   r5   r7   c                 C   r   r[   )r   r   _generate_range_regexr5   r7   r   r   r   r_   "  r^   r_   c                 C   r   r[   )r   r   _generate_float_regexr`   r   r   r   ra   &  r^   ra   c                   @   s~   e Zd ZdZededefddZededefddZededefdd	Zededefd
dZ	ededefddZ
dS )GrammarFunctorzrA utility class for transforming grammars. These methods are called during grammar parsing.
    For test purposes.r&   r   c                 C      t tjj| jS )z'Normalize the structure of the grammar.)r   r$   r   r   grammar_functorstructure_normalizerr]   r&   r   r   r   re   .     z#GrammarFunctor.structure_normalizerc                 C   rc   )z+Inline some rule references in the grammar.)r   r$   r   r   rd   rule_inlinerr]   rf   r   r   r   rh   5  rg   zGrammarFunctor.rule_inlinerc                 C   rc   )z-Fuse the byte string elements in the grammar.)r   r$   r   r   rd   byte_string_fuserr]   rf   r   r   r   ri   <  rg   z GrammarFunctor.byte_string_fuserc                 C   rc   )z2Eliminate the not referenced rules in the grammar.)r   r$   r   r   rd   dead_code_eliminatorr]   rf   r   r   r   rj   C  rg   z#GrammarFunctor.dead_code_eliminatorc                 C   rc   )z4Analyze and add lookahead assertions in the grammar.)r   r$   r   r   rd   lookahead_assertion_analyzerr]   rf   r   r   r   rk   J  rg   z+GrammarFunctor.lookahead_assertion_analyzerN)__name__
__module____qualname____doc__staticmethodr   re   rh   ri   rj   rk   r   r   r   r   rb   *  s    rb   )T)r!   )r   r[   )NN),ro   r2   typingr   r   r   r   r   r   r   rR   Zpydanticr	   baser   compilerr   r   r&   r   r   Zmatcherr   r   r*   r   strboolintr   r    r%   r-   r8   ZTensorrB   rG   rY   rZ   r\   r_   floatra   rb   r   r   r   r   <module>   s    $
*

/




  