o
    ưi*                  "   @   s
  d dl mZmZmZ d dlmZmZ d dlZd dlm	Z	 d dl
mZ ddlmZ g dddddddddddddddd	fd
ededee dee dee dee dee dee dee dee dee dee dee dee dee dee f ddZdd Zdd ZdS )     )FIRST_COMPLETEDThreadPoolExecutorwait)ListOptionalN)print_verbose)get_optional_params   )handleriX  d   modelmessages	functionsfunction_calltemperaturetop_pnstream
max_tokenspresence_penaltyfrequency_penalty
logit_biasuserrequest_timeouttimeoutmax_workersc           !      K   s  t  }|}g }| } d}| ddd tjv r&| ddd }| ddd } |dkrJt||||||p3d||	|
|||| |d}tj| |tj|d}|S d	d
 }t|d>}||dD ]0}|D ]+}|	 }|
d ||d< i }d|v rw|
d}|jtjfi ||}|| q]qYW d   n1 sw   Y  g }|D ]"}z	||  W q ty }  z||  W Y d} ~ qd} ~ ww |S )a   
    Batch litellm.completion function for a given model.

    Args:
        model (str): The model to use for generating completions.
        messages (List, optional): List of messages to use as input for generating completions. Defaults to [].
        functions (List, optional): List of functions to use as input for generating completions. Defaults to [].
        function_call (str, optional): The function call to use as input for generating completions. Defaults to "".
        temperature (float, optional): The temperature parameter for generating completions. Defaults to None.
        top_p (float, optional): The top-p parameter for generating completions. Defaults to None.
        n (int, optional): The number of completions to generate. Defaults to None.
        stream (bool, optional): Whether to stream completions or not. Defaults to None.
        stop (optional): The stop parameter for generating completions. Defaults to None.
        max_tokens (float, optional): The maximum number of tokens to generate. Defaults to None.
        presence_penalty (float, optional): The presence penalty for generating completions. Defaults to None.
        frequency_penalty (float, optional): The frequency penalty for generating completions. Defaults to None.
        logit_bias (dict, optional): The logit bias for generating completions. Defaults to {}.
        user (str, optional): The user string for generating completions. Defaults to "".
        deployment_id (optional): The deployment ID for generating completions. Defaults to None.
        request_timeout (int, optional): The request timeout for generating completions. Defaults to None.
        max_workers (int,optional): The maximum number of threads to use for parallel processing.

    Returns:
        list: A list of completion results.
    N/   r   ZvllmF)r   r   r   r   r   r   stopr   r   r   r   r   r   custom_llm_provider)r   r   custom_prompt_dictoptional_paramsc                 s   s.    t dt| |D ]}| |||  V  q	dS )z)Yield successive n-sized chunks from lst.r   N)rangelen)lstr   i r&   T/home/app/Keep/.python/lib/python3.10/site-packages/litellm/batch_completion/main.pychunks_   s   z batch_completion.<locals>.chunksr   r   r   r   kwargs)localssplitlitellmZprovider_listr   vllm_handlerZbatch_completionsr    r   copypopsubmit
completionappendresult	Exception)!r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zdeployment_idr   r   r   r*   argsZbatch_messagesZcompletionsr   r!   resultsr(   executorZ	sub_batchZmessage_listZkwargs_modifiedZoriginal_kwargsfutureexcr&   r&   r'   batch_completion   s|   0&

r;   c               
      s  d|v r	| d d|v rl|d  | d i }tt dC} D ]}|jtjg| R d|i|||< q"t|  fdddD ]\}}| durY|   W  d   S qBW d   dS 1 sew   Y  dS d|v r@|d }| d | d	 | d
i }i }tt|d}|D ]'}|	 D ]}	|	|vr||	 ||	< qi ||}|jtjfi |||d < q|r/t
d t| td\}
}t
d|
  |
D ]I}z| }|W   W  d   S  ty   t
d t
| i }| D ]\}	}||krt
d|	  q|||	< q|}t
d|  Y qw t
d t
| |sW d   dS W d   dS 1 s;w   Y  dS )a  
    Send a request to multiple language models concurrently and return the response
    as soon as one of the models responds.

    Args:
        *args: Variable-length positional arguments passed to the completion function.
        **kwargs: Additional keyword arguments:
            - models (str or list of str): The language models to send requests to.
            - Other keyword arguments to be passed to the completion function.

    Returns:
        str or None: The response from one of the language models, or None if no response is received.

    Note:
        This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
        It sends requests concurrently and returns the response from the first model that responds.
    r   modelsr)   c                    s     | d S )Nr   )index)xr<   r&   r'   <lambda>   s    z)batch_completion_models.<locals>.<lambda>)keyNdeploymentsZ
model_listr*   z

 waiting for next result

)return_whenz
done list
z3

got an exception, ignoring, removing from futureszremoving keyznew futuresz 

done looping through futures

)r0   r   r#   r1   r-   r2   sorteditemsr4   keysr   r   valuesr   r5   )r6   r*   futuresr8   r   r9   rB   Znested_kwargsZ
deploymentrA   done_r4   Znew_futuresvaluer&   r?   r'   batch_completion_models   s   


	
::





)
))rL   c            	         s(  ddl }dv rd dv rd}ntdt|tr$|g}nt|ttfr0t|}ntdt|dkr<g S g }|j	j
t|dC fdd	|D }|D ],}z| }|dure|| W qU ty } ztd
t|  W Y d}~qUd}~ww W d   |S 1 sw   Y  |S )a  
    Send a request to multiple language models concurrently and return a list of responses
    from all models that respond.

    Args:
        *args: Variable-length positional arguments passed to the completion function.
        **kwargs: Additional keyword arguments:
            - models (str or list of str): The language models to send requests to.
            - Other keyword arguments to be passed to the completion function.

    Returns:
        list: A list of responses from the language models that responded.

    Note:
        This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
        It sends requests concurrently and collects responses from all models that respond.
    r   Nr   r<   z'models' param not in kwargsz,'models' must be a string or list of stringsr)   c                    s*   g | ]}j tjg R d |iqS )r   )r1   r-   r2   ).0r   r6   r8   r*   r&   r'   
<listcomp>  s    z9batch_completion_models_all_responses.<locals>.<listcomp>z=batch_completion_models_all_responses: model request failed: )concurrent.futuresr0   r5   
isinstancestrlisttuple	TypeErrorr#   rH   r   r4   r3   r   )	r6   r*   
concurrentr<   	responsesrH   r9   r4   er&   rN   r'   %batch_completion_models_all_responses   sH   




rY   )rP   r   r   r   typingr   r   r-   Zlitellm._loggingr   Zlitellm.utilsr   Zllms.vllm.completionr
   r.   rR   floatintbooldictr;   rL   rY   r&   r&   r&   r'   <module>   st    	
vV