o
    ưi                     @   sN   d dl Z d dlmZmZmZmZmZmZ d dlZddl	m
Z
 G dd dZdS )    N)AnyDictIteratorListOptionalUnion   )UnauthorizedErrorc                   @   s2  e Zd Zddedee fddZdeeef fddZ									dd
edeeeef  dee	 dee	 dee
 dee
 dee	 dee	 dee dedeeeef ejf fddZ							dd
edeeeef  dee	 dee	 dee
 dee
 dee	 dee	 dee deeeef  fddZdS )
ChatClientNbase_urlapi_keyc                 C   s   | d| _|| _dS )a  
        Initialize the ChatClient.

        Args:
            base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:8000")
            api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token.
        /N)rstrip	_base_url_api_key)selfr   r    r   P/home/app/Keep/.python/lib/python3.10/site-packages/litellm/proxy/client/chat.py__init__
   s   
zChatClient.__init__returnc                 C   s"   ddi}| j rd| j  |d< |S )z
        Get the headers for API requests, including authorization if api_key is set.

        Returns:
            Dict[str, str]: Headers to use for API requests
        zContent-Typezapplication/jsonzBearer Authorization)r   )r   headersr   r   r   _get_headers   s   zChatClient._get_headersFmodelmessagestemperaturetop_pn
max_tokenspresence_penaltyfrequency_penaltyuserreturn_requestc              
   C   s   | j  d}||d}|dur||d< |dur||d< |dur#||d< |dur+||d< |dur3||d< |dur;||d	< |	durC|	|d
< tjd||  |d}|
rR|S t }z|| }|  | W S  tj	j
y } z|jjdkrzt| d}~ww )a  
        Create a chat completion.

        Args:
            model (str): The model to use for completion
            messages (List[Dict[str, str]]): The messages to generate a completion for
            temperature (Optional[float]): Sampling temperature between 0 and 2
            top_p (Optional[float]): Nucleus sampling parameter between 0 and 1
            n (Optional[int]): Number of completions to generate
            max_tokens (Optional[int]): Maximum number of tokens to generate
            presence_penalty (Optional[float]): Presence penalty between -2.0 and 2.0
            frequency_penalty (Optional[float]): Frequency penalty between -2.0 and 2.0
            user (Optional[str]): Unique identifier for the end user
            return_request (bool): If True, returns the prepared request object instead of executing it

        Returns:
            Union[Dict[str, Any], requests.Request]: Either the completion response from the server or
            a prepared request object if return_request is True

        Raises:
            UnauthorizedError: If the request fails with a 401 status code
            requests.exceptions.RequestException: If the request fails with any other error
        /chat/completions)r   r   Nr   r   r   r   r   r    r!   POST)r   json  )r   requestsRequestr   Sessionsendprepareraise_for_statusr%   
exceptions	HTTPErrorresponsestatus_coder	   )r   r   r   r   r   r   r   r   r    r!   r"   urldatarequestsessionr/   er   r   r   completions!   s<   $

zChatClient.completionsc
              
   c   sZ   | j  d}
||dd}|dur||d< |dur||d< |dur%||d< |dur-||d< |dur5||d	< |dur=||d
< |	durE|	|d< t }zJ|j|
|  |dd}|  | D ]3}|r|d}|dr|dd }|	 dkr{ W dS z
t
|}|V  W q] t
jy   Y q]w q]W dS  tjjy } z|jjdkrt| d}~ww )a  
        Create a streaming chat completion.

        Args:
            model (str): The model to use for completion
            messages (List[Dict[str, str]]): The messages to generate a completion for
            temperature (Optional[float]): Sampling temperature between 0 and 2
            top_p (Optional[float]): Nucleus sampling parameter between 0 and 1
            n (Optional[int]): Number of completions to generate
            max_tokens (Optional[int]): Maximum number of tokens to generate
            presence_penalty (Optional[float]): Presence penalty between -2.0 and 2.0
            frequency_penalty (Optional[float]): Frequency penalty between -2.0 and 2.0
            user (Optional[str]): Unique identifier for the end user

        Yields:
            Dict[str, Any]: Streaming response chunks from the server

        Raises:
            UnauthorizedError: If the request fails with a 401 status code
            requests.exceptions.RequestException: If the request fails with any other error
        r#   T)r   r   streamNr   r   r   r   r   r    r!   )r   r%   r7   zutf-8zdata:    z[DONE]r&   )r   r'   r)   postr   r,   
iter_linesdecode
startswithstripr%   loadsJSONDecodeErrorr-   r.   r/   r0   r	   )r   r   r   r   r   r   r   r   r    r!   r1   r2   r4   r/   lineZdata_strchunkr5   r   r   r   completions_streamj   sd   !



zChatClient.completions_stream)N)NNNNNNNF)NNNNNNN)__name__
__module____qualname__strr   r   r   r   r   floatintboolr   r   r'   r(   r6   r   rB   r   r   r   r   r
   	   s|    	

M	
r
   )r%   typingr   r   r   r   r   r   r'   r-   r	   r
   r   r   r   r   <module>   s
     