o
    o i!                     @   s   d dl Z d dlmZmZmZmZmZmZmZ ddl	m
Z
 er>d dlmZ d dlmZ d dl	mZ d dlmZmZ d dlmZ G dd	 d	Zi i dd
fdedededee def
ddZdS )    N)TYPE_CHECKING	GeneratorIteratorListOptionalTupleUnion   )TransformerTokenizer)PreTrainedTokenizer)GenerationParametersSamplingParameters)OutlinesLogitsProcessorc                   @   s   e Zd ZdZ				dddZdeeee f d	d
dddefddZdeeee f d	d
ddde	e fddZ
dddee dee dedddeeeef ddf fddZdS )MLXLMz&
    Represents an `mlx_lm` model
    model	nn.Module	tokenizerr   c                 C   s   || _ || _t|j| _d S )N)r   mlx_tokenizerr
   Z
_tokenizerr   )selfr   r    r   a/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/outlines/models/mlxlm.py__init__   s
   
zMLXLM.__init__promptsgeneration_parametersr   sampling_parametersr   returnc                 C   s   |  ||||}dt|S )N )streamjoinlist)r   r   r   logits_processorr   streamerr   r   r   generate   s   zMLXLM.generatec                 c   s4   ddl m} t|\}}}t|\}	}
}}}|du r!td}t|ts*td|	dkr2td|
dkr:td|durBtd	|durJtd
|durRtd|||	|d}|| j	
|}| j	j}|  t| j|fi |t|D ]\\}}}|| jjkr n
|| |jV  qx|  |jV  dS )a  Generate text using `mlx_lm`.

        Arguments
        ---------
        prompts
            A prompt or list of prompts.
        generation_parameters
            An instance of `GenerationParameters` that contains the prompt,
            the maximum number of tokens, stop sequences and seed. All the
            arguments to `SequenceGeneratorAdapter`'s `__cal__` method.
        logits_processor
            The logits processor to use when generating text.
        sampling_parameters
            An instance of `SamplingParameters`, a dataclass that contains
            the name of the sampler to use and related parameters as available
            in Outlines.
        Returns
        -------
        The generated text.
        r   Ng    eAz6The `mlx-lm` library does not support batch inference.Zbeam_searchz2The `mlx-lm` library does not support Beam Search.r	   z<The `mlx-lm` library does not allow to take several samples.z,The `mlx-lm` library does not support top_k.z+The `mlx-lm` library does not support seed.z.The `mlx-lm` library does not support stop_at.)temptop_psamplerr    )mlx.corecoredataclassesastupleint
isinstancestrNotImplementedErrorarrayr   encodedetokenizerresetzipgenerate_stepranger   Zeos_token_idZ	add_tokenlast_segmentfinalize)r   r   r   r    r   mxZ
max_tokensZstop_atseedr%   Znum_samplesr$   Ztop_ktemperatureZgenerate_kwargsZprompt_tokensr0   tokenprobnr   r   r   r   +   sX   
	

zMLXLM.streampromptmx.arrayr#   r$   r%   r    r   Nc                 #   s    ddl m ddl |pddddtdtf f fdd} jj| j}|}g }		 | j|d |d
}
|
dddddf }
|durW|
	d}||	|}|	dd}
||
\}}|
 }||fV  |	| |}q/)a  
        Adapted from
        https://github.com/ml-explore/mlx-examples/blob/4872727/llms/mlx_lm/utils.py#L129

        A generator producing token ids based on the given prompt from the model.

            Args:
                prompt (mx.array): The input prompt.
                temp (float): The temperature for sampling, if 0 the argmax is used.
                  Default: ``0``.
                top_p (float, optional): Nulceus sampling, higher means model considers
                  more less likely words.
                sampler (str): The sampler string defined by SequenceGeneratorAdapter
                logits_processor (OutlinesLogitsProcessor): Augment logits before sampling.
        r   N      ?logitsr>   r   c                    s    | }dksdkrj| dd}n,dkr9d ur.dkr.dk r. j| }nj| d  }ntd	 d
|d|f }||fS )Ng        Zgreedy)ZaxisZmultinomialr   r?   r	   zInvalid mlx-lm sampler: ``)ZsoftmaxZargmaxZsample_utilsZtop_p_samplingrandomZcategorical
ValueError)r@   Zsoftmax_logitsr:   r;   mlx_lmr7   r%   r9   r$   r   r   sample   s   
z#MLXLM.generate_step.<locals>.sampleT)cacherA   r	   )r&   r'   rF   r   floatmodelsrH   Zmake_prompt_cacher   Zreshapeitemappend)r   r=   r#   r$   r%   r    rG   rH   Zunprocessed_input_idsZgenerated_idsr@   Z	logits_1dZnew_token_singler;   Z	new_tokenr   rE   r   r3   |   s*   &



zMLXLM.generate_step)r   r   r   r   )__name__
__module____qualname____doc__r   r   r,   r   r"   r   r   r   rI   r   r   r*   r3   r   r   r   r   r      sL    


Qr   F
model_nametokenizer_configmodel_configadapter_pathlazyc           	      C   sd   zddl m} ddl}W n ty   tdw |j s!td|j| ||||d\}}t||S )a  Instantiate a model from the `mlx_lm` library and its tokenizer.

    Signature adapted from
    https://github.com/ml-explore/mlx-examples/blob/4872727/llms/mlx_lm/utils.py#L422

    Parameters
    ----------
    Args:
        path_or_hf_repo (Path): The path or the huggingface repository to load the model from.
        tokenizer_config (dict, optional): Configuration parameters specifically for the tokenizer.
            Defaults to an empty dictionary.
        model_config(dict, optional): Configuration parameters specifically for the model.
            Defaults to an empty dictionary.
        adapter_path (str, optional): Path to the LoRA adapters. If provided, applies LoRA layers
            to the model. Default: ``None``.
        lazy (bool): If False eval the model parameters to make sure they are
            loaded in memory before returning, otherwise they will be loaded
            when needed. Default: ``False``

    Returns
    -------
    A `MLXLM` model instance.

    r   NzKThe `mlx_lm` library needs to be installed in order to use `mlx_lm` models.z5You cannot use `mlx_lm` without Apple Silicon (Metal))rR   rS   rT   rU   )	r&   r'   rF   ImportErrorZmetalZis_availableRuntimeErrorloadr   )	rQ   rR   rS   rT   rU   r7   rF   r   r   r   r   r   mlxlm   s$   


rY   )r(   typingr   r   r   r   r   r   r   Ztransformersr
   r&   r'   r7   Zmlx.nnnnr   Zoutlines.generate.apir   r   Zoutlines.processorsr   r   r,   dictboolrY   r   r   r   r   <module>   s4    $ 6