o
    o i;                  	   @   s  d dl Z d dlZd dlmZmZmZmZmZmZ d dl	m
Z
mZ d dlmZ er:d dlZd dlmZmZ d dlmZ dgZeed d	f Zd
d ZG dd deZG dd dZdi i ddfdedee dedefddZdi i fdedee dedefddZdS )    N)TYPE_CHECKINGIteratorListOptionalTupleUnion)GenerationParametersSamplingParameters)	Tokenizer)PreTrainedModelPreTrainedTokenizer)OutlinesLogitsProcessortransformers)torch.DoubleTensorr   .c                  C   s   zddl m}  W n ty   G dd d} Y nw zddl m} W n ty1   G dd d}Y nw zddlm} W n tyJ   G d	d
 d
}Y nw zddlm} W n tyc   G dd d}Y nw | |||fS )zGet all the Llama tokenizer types/classes that need work-arounds.

    When they can't be imported, a dummy class is created.

    r   )LlamaTokenizerc                   @      e Zd ZdS )z1get_llama_tokenizer_types.<locals>.LlamaTokenizerN__name__
__module____qualname__ r   r   h/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/outlines/models/transformers.pyr          r   )LlamaTokenizerFastc                   @   r   )z5get_llama_tokenizer_types.<locals>.LlamaTokenizerFastNr   r   r   r   r   r   %   r   r   )CodeLlamaTokenizerc                   @   r   )z5get_llama_tokenizer_types.<locals>.CodeLlamaTokenizerNr   r   r   r   r   r   ,   r   r   )CodeLlamaTokenizerFastc                   @   r   )z9get_llama_tokenizer_types.<locals>.CodeLlamaTokenizerFastNr   r   r   r   r   r   3   r   r   )Ztransformers.models.llamar   ImportErrorr   Ztransformers.models.code_llamar   r   )r   r   r   r   r   r   r   get_llama_tokenizer_types   s2   r   c                   @   s   e Zd ZdZdddZdeeee f ded fd	d
Z	dddee fddZ
dedefddZdd Zdd Zdd Zdd ZdS )TransformerTokenizerz@Represents a tokenizer for models in the `transformers` library.	tokenizerr   c                 K   s~   || _ | j j| _| j j| _| j jd u r| j j| j _| j| _n
| j j| _| j j| _t| j j| _| j  | _	t
| j t | _d S N)r   eos_token_idZ	eos_tokenpad_token_idZ	pad_tokensetZall_special_tokensZspecial_tokensZ	get_vocabZ
vocabulary
isinstancer   is_llama)selfr   kwargsr   r   r   __init__A   s   




zTransformerTokenizer.__init__promptreturn)torch.LongTensorr+   c                 K   s2   d|d< d|d< | j |fi |}|d |d fS )NTpaddingptZreturn_tensors	input_idsattention_maskr   )r&   r)   r'   outputr   r   r   encodeR   s   zTransformerTokenizer.encode	token_idsr+   c                 C   s   | j j|dd}|S )NT)Zskip_special_tokens)r   Zbatch_decode)r&   r3   textr   r   r   decodeZ   s   zTransformerTokenizer.decodetokenc                 C   s>   ddl m} | j|g}| jr||s|dkrd| S |S )Nr   )SPIECE_UNDERLINEz<0x20> )Ztransformers.file_utilsr7   r   Zconvert_tokens_to_stringr%   
startswith)r&   r6   r7   stringr   r   r   convert_token_to_string^   s   z,TransformerTokenizer.convert_token_to_stringc                 C   sJ   t |t| r#t| drt| dr|j| jko|j| jkS |j| jkS tS )N
model_namer'   )r$   typehasattrr<   r'   r   NotImplemented)r&   otherr   r   r   __eq__j   s   zTransformerTokenizer.__eq__c                 C   s   ddl m} t|| jS )Nr   )Hasher)Zdatasets.fingerprintrB   hashr   )r&   rB   r   r   r   __hash__t   s   zTransformerTokenizer.__hash__c                 C   s   d| j i}|S Nr   r0   r&   stater   r   r   __getstate__y   s   
z!TransformerTokenizer.__getstate__c                 C   s   |  |d  d S rE   )r(   rF   r   r   r   __setstate__}   s   z!TransformerTokenizer.__setstate__N)r   r   )r   r   r   __doc__r(   r   strr   r   r2   r5   r;   rA   rD   rH   rI   r   r   r   r   r   >   s    


r   c                   @   s4  e Zd ZdZ				d$ddZ	d%d	d
dd
dee dedee f fddZ	d%d	d
dd
dee ddfddZ	de
eee f deded dede
eee eee  f f
ddZde
eee f deded dedee
eee f  f
ddZde
eee f deded dedef
ddZdd Zd&d"d#ZdS )'Transformersz"Represents a `transformers` model.modelr   r   r   c                 C   s   || _ t|| _d S r    )rM   r   r   )r&   rM   r   r   r   r   r(      s   zTransformers.__init__Nr.   r+   r/   past_key_valuesr*   ztorch.FloatTensorc              	   C   s   zddl }W n ty   td Y nw d|j  k r dk s#J  J |r,|d d}|  | j||ddd|d	}W d   n1 sFw   Y  |j|jfS )
a   Compute a forward pass through the transformer model.

        Parameters
        ----------
        input_ids
            The input token ids.  Must be one or two dimensional.
        attention_mask
            The attention mask.  Must be one or two dimensional.
        past_key_values
            A tuple of tuples containing the cached key and value tensors for each
            attention head.

        Returns
        -------
        The computed logits and the new cached key and value tensors.

        r   NzGThe `torch` library needs to be installed to use `transformers` models.   ).rP   TF)r/   Zreturn_dictZoutput_attentionsZoutput_hidden_statesrN   )torchr   ndim	unsqueezeZinference_moderM   logitsrN   )r&   r.   r/   rN   rQ   r1   r   r   r   forward   s*   

zTransformers.forwardc                 C   s,   |  |||\}}|ddd d f }||fS )N.rP   )rU   )r&   r.   r/   rN   rT   Zkv_cacheZnext_token_logitsr   r   r   __call__   s   zTransformers.__call__promptsgeneration_parameterslogits_processorr   sampling_parametersc           
      C   s   t |tr| j|g\}}n| j|\}}|| jj|| jjd}dt| jj	j
 vr5|d= | ||||}| j||fi |}	t |trQ|	d}	| |	S )a  Generate text using `transformers`.

        Arguments
        ---------
        prompts
            A prompt or list of prompts.
        generation_parameters
            An instance of `GenerationParameters` that contains the prompt,
            the maximum number of tokens, stop sequences and seed. All the
            arguments to `SequenceGeneratorAdapter`'s `__cal__` method.
        logits_processor
            The logits processor to use when generating text.
        sampling_parameters
            An instance of `SamplingParameters`, a dataclass that contains
            the name of the sampler to use and related parameters as available
            in Outlines.

        Returns
        -------
        The generated text
        r.   r/   r/   r   )r$   rK   r   r2   torM   deviceinspect	signaturerU   
parameterskeys_get_generation_kwargs_generate_output_seqsqueeze_decode_generation)
r&   rW   rX   rY   rZ   r.   r/   inputsgeneration_kwargsgenerated_idsr   r   r   generate   s(   



zTransformers.generatec                 c   s    t |tr| j|g\}}n| j|\}}|| jj|| jjd}dt| jj	j
 vr6|d= | ||||}| j||fi |}	t |trR|	d}	t|	dD ]}
|	d|
d}| |V  qYdS )a  
        Temporary stream stand-in which implements stream() signature
        and equivalent behaviour but isn't yielded until generation completes.

        TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810
        r[   r/   r   rP   N)r$   rK   r   r2   r\   rM   r]   r^   r_   rU   r`   ra   rb   rc   rd   rangesizeselectrS   re   )r&   rW   rX   rY   rZ   r.   r/   rf   rg   rh   iZoutput_group_idsr   r   r   stream   s0   


zTransformers.streamc                 C   s   ddl m}m}m} t|\}}	}
t|\}}}}}|du r$td}|
dur,||
 |dur6||g}nd}|||	|p>d||||dk|dkrJ|nd| jj| jj	d
}t
||| jjd	S )
zR
        Conert outlines generation parameters into model.generate kwargs
        r   )GenerationConfigLogitsProcessorListset_seedNi   @   ZmultinomialZbeam_search)
max_new_tokensZstop_stringsnum_return_sequencestop_ptop_ktemperatureZ	do_sampleZ	num_beamsr!   r"   )rY   generation_configr   )r   ro   rp   rq   dataclassesastupleintr   r!   r"   dict)r&   rW   rX   rY   rZ   ro   rp   rq   rs   Zstop_atseedZsamplernum_samplesru   rv   rw   Zlogits_processor_listrx   r   r   r   rb   +  s:   
z#Transformers._get_generation_kwargsc                 K   s   |d }| j jdi |d|i|}| j jjr|}n|d d |jd d f }|jp,d}|dkrGt|trG|d}	|jp?d}
|	|	|
d}|S )Nr.   rx   rr   r   rP   r   )
rM   ri   configZis_encoder_decodershapert   r$   listrk   view)r&   rW   rf   rx   rg   r.   Z
output_idsrh   r~   Z
batch_sizert   r   r   r   rc   Z  s"   




z!Transformers._generate_output_seqrh   torch.Tensorc                    st   t  jdkrj gd S t  jdkrj S t  jdkr2 fddtt  D S td j )Nrr   r      rO   c                    s   g | ]
}j  | qS r   )r   r5   ).0rm   rh   r&   r   r   
<listcomp>x  s    z3Transformers._decode_generation.<locals>.<listcomp>z7Generated outputs aren't 1D, 2D or 3D, but instead are )lenr   r   r5   rj   	TypeError)r&   rh   r   r   r   re   r  s   

zTransformers._decode_generation)rM   r   r   r   r    )rh   r   )r   r   r   rJ   r(   r   r   KVCacheTyperU   rV   r   rK   r   r   r	   ri   r   rn   r|   rb   rc   re   r   r   r   r   rL      s|    

2

:
,
/rL   r<   r]   model_kwargstokenizer_kwargsc           
      C   s   |du s|du rz
ddl m}m} W n ty   tdw |du r$|}|du r*|}|dur2||d< |j| fi |}|dd |j| fi |}	t||	S )a  Instantiate a model from the `transformers` library and its tokenizer.

    Parameters
    ----------
    model_name
        The name of the model as listed on Hugging Face's model page.
    device
        The device(s) on which the model should be loaded. This overrides
        the `device_map` entry in `model_kwargs` when provided.
    model_kwargs
        A dictionary that contains the keyword arguments to pass to the
        `from_pretrained` method when loading the model.
    tokenizer_kwargs
        A dictionary that contains the keyword arguments to pass to the
        `from_pretrained` method when loading the tokenizer.

    Returns
    -------
    A `TransformersModel` model instance.

    Nr   )AutoModelForCausalLMAutoTokenizerzWThe `transformers` library needs to be installed in order to use `transformers` models.Z
device_mapZpadding_sideleft)r   r   r   r   Zfrom_pretrained
setdefaultrL   )
r<   r]   r   r   model_classZtokenizer_classr   r   rM   r   r   r   r   r     s$   
c                 C   s:   zddl m} W n ty   tdw t | ||||dS )Nr   )MambaForCausalLMzaThe `mamba_ssm`, `torch` and `transformer` libraries needs to be installed in order to use Mamba.)r<   r]   r   r   r   )r   r   r   )r<   r]   r   r   r   r   r   r   mamba  s   r   )ry   r^   typingr   r   r   r   r   r   Zoutlines.generate.apir   r	   Zoutlines.models.tokenizerr
   rQ   r   r   r   Zoutlines.processorsr   __all__r   r   r   rL   rK   r|   r   r   r   r   r   <module>   sT     *C  
6