o
    ưiʟ                  	   @   sx  d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlZddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, erddl-m.Z/ neZ/ddl0m1Z1 ddl2m3Z3 G dd deZ4e Z5G dd dZ6	dde	deeedf  de
e7ef fddZ8dS )a  
This contains LLMCachingHandler

This exposes two methods:
    - async_get_cache
    - async_set_cache

This file is a wrapper around caching.py

This class is used to handle caching logic specific for LLM API requests (completion / embedding / text_completion / transcription etc)

It utilizes the (RedisCache, s3Cache, RedisSemanticCache, QdrantSemanticCache, InMemoryCache, DiskCache) based on what the user has setup

In each method it will call the appropriate method from caching.py
    N)
TYPE_CHECKINGAnyAsyncGeneratorCallableDict	GeneratorListOptionalTupleUnion)	BaseModel)print_verboseverbose_logger)InMemoryCache)S3Cache)update_response_metadata)1_assemble_complete_response_from_streaming_chunks)CachedEmbedding)ResponsesAPIResponse)RerankResponse)CachingDetails	CallTypes	EmbeddingEmbeddingResponseModelResponseTextCompletionResponseTranscriptionResponseUsage)Logging!_get_parent_otel_span_from_kwargsCustomStreamWrapperc                   @   s>   e Zd ZU dZdZee ed< dZee	 ed< dZ
eed< dS )CachingHandlerResponsea  
    This is the response object for the caching handler. We need to separate embedding cached responses and (completion / text_completion / transcription) cached responses

    For embeddings there can be a cache hit for some of the inputs in the list and a cache miss for others
    Ncached_resultfinal_embedding_cached_responseF embedding_all_elements_cache_hit)__name__
__module____qualname____doc__r$   r	   r   __annotations__r%   r   r&   bool r-   r-   V/home/app/Keep/.python/lib/python3.10/site-packages/litellm/caching/caching_handler.pyr#   H   s   
 r#   c                   @   s*  e Zd Zdedeeef dejfddZ	dBdedede	dejd	ed
eeef de
eedf  de
e fddZ	dBdedede	dejd	ed
eeef de
eedf  defddZd
eeef dee fddZdeeeef  de
e fddZde
e dee
e  d
eeef de	dejdedee
e ef fddZdededefddZded edejd!ejdef
d"d#Zde	dedejd!ejd$ef
d%d&Zd	ed
eeef deedf de
e fd'd(Z	dBded	ed
eeef de	dedeedf d)e
e de
eeeeee e!f  fd*d+Z"ded	ede	dede!f
d,d-Z#	dBd.eded
eeef de
eedf  fd/d0Z$	dBd.ed
eeef de
eedf  fd1d2Z%ded
eeef defd3d4Z&dedefd5d6Z'd7efd8d9Z(d7efd:d;Z)	<		dCde	ded
eeef ded=ed>ed)e
e d?e
e* fd@dAZ+dS )DLLMCachingHandleroriginal_functionrequest_kwargs
start_timec                 C   sf   ddl m}m} g | _g | _|| _|| _|| _tj	d ur.t
tj	j	|r.|tj	j	td| _d S d | _d S )Nr   )	DualCache
RedisCache)redis_cacheZin_memory_cache)litellm.cachingr3   r4   async_streaming_chunkssync_streaming_chunksr1   r0   r2   litellmcache
isinstancein_memory_cache_obj
dual_cache)selfr0   r1   r2   r3   r4   r-   r-   r.   __init__Z   s   zLLMCachingHandler.__init__Nmodellogging_obj	call_typekwargsargs.returnc              
      sV  | dddu rtjdus| dddu r)| di  dddur)|p'd}d}d}	d}
| }t }d}t|}||d< tjdur| j|d	rt	d
 | j
|||dI dH }
t }|
durt|
tst	d d}tj }tj|| dd| dd| ddd\}}}}|| d }| j||||
d||d |j}| j|
||||| dd|d}
| dddu r| j||
|||d tjjdi |}t|
tst|
trt|
dr||
jd< t|
dS |tjjkr|
durt|
trtjdurttjjts| j||
||||d\}}	t||	dS t	d|
  t|
|dS dS )aC  
        Internal method to get from the cache.
        Handles different call types (embeddings, chat/completions, text_completion, transcription)
        and accordingly returns the cached response

        Args:
            model: str:
            original_function: Callable:
            logging_obj: LiteLLMLoggingObj:
            start_time: datetime.datetime:
            call_type: str:
            kwargs: Dict[str, Any]:
            args: Optional[Tuple[Any, ...]] = None:


        Returns:
            CachingHandlerResponse:
        Raises:
            None
        ZcachingNFTr:   zno-cacher-   parent_otel_spanr0   zChecking Async Cache)rB   rC   rD   z
Cache Hit!custom_llm_providerapi_baseapi_keyr@   rH   rI   rJ     )rA   r@   rC   r$   is_asyncrH   cache_duration_msr$   rB   rC   rA   r@   rH   rD   streamrA   r$   r2   end_time	cache_hit_hidden_params	cache_keyr$   )r%   r$   rC   rA   r2   r@   )r%   r&   zCACHE RESULT: )r$   r%   )getr9   r:   copytimeperf_counterr     _is_call_type_supported_by_cacher   debug_retrieve_from_cacher;   listdatetimenowget_llm_provider'_update_litellm_logging_obj_environmentr'   (_convert_cached_result_to_model_response!_async_log_cache_hit_on_callbacksget_cache_keyr   r"   hasattrrT   r#   r   
aembeddingvaluer   (_process_async_embedding_cached_response)r>   r@   r0   rA   r2   rB   rC   rD   r%   r&   r$   Zcache_check_start_timeZcache_check_end_timerF   rS   rR   rH   _rN   rU   r-   r-   r.   _async_get_cachep   s    







	


	z"LLMCachingHandler._async_get_cachec              
   C   s\  ddl m} d }	tjd ur| j|dr|pd}| }
|
t| j| t	d tjj
di |
}	|	d urd|	v r:no|j}| j|	|||||dd |d}	d	}tj }tj|pYd
|dd |dd |dd d\}}}}| j|| d| ||	dd |j|	|||d tjjdi |}t|	tst|	|rt|	dr||	jd< t|	dS t|	dS )Nr   r!   rG   r-   zChecking Sync CachedetailrH   rO   T rI   rJ   rK   /F)rA   r@   rC   r$   rM   resultr2   rR   rS   rT   rU   rV   )litellm.utilsr"   r9   r:   r[   rX   updateconvert_args_to_kwargsr0   r   	get_cacher'   rc   rW   r_   r`   ra   rb   -handle_sync_success_callbacks_for_async_callsre   r;   r   rf   rT   r#   )r>   r@   r0   rA   r2   rB   rC   rD   r"   r$   
new_kwargsrS   rR   rH   dynamic_api_keyrI   rU   r-   r-   r.   _sync_get_cache   s   








z!LLMCachingHandler._sync_get_cachec                 C   s6   t |d tr|d gS t |d tr|d S td)zO
        Handles the input of kwargs['input'] being a list or a string
        input input must be a string or a list)r;   strr^   
ValueError)r>   rC   r-   r-   r.   handle_kwargs_input_list_or_strS  s
   
z1LLMCachingHandler.handle_kwargs_input_list_or_strnon_null_listc                 C   s2   |D ]\}}t |tr|dr|d   S qdS )a	  
        Helper method to extract the model name from cached results.

        Args:
            non_null_list: List of (idx, cr) tuples where cr is the cached result dict

        Returns:
            Optional[str]: The model name if found, None otherwise
        r@   N)r;   dictrW   )r>   r~   rj   crr-   r-   r.   "_extract_model_from_cached_results^  s
   z4LLMCachingHandler._extract_model_from_cached_resultsr%   r$   c                 C   s  d}g }g }	|  |}
t|D ]\}}|du r||
|  q|	||f q||d< t|	dkr| |	}|s=|d}t|dgt|
 d}d|jd< d}|	D ]4}|\}}|dur|d	}|durnt||d	d
|j	|< t
|
| trddlm} |||
| dd7 }qQt|d|d}||_t|dkrd}d}tj }tj||dd|dd|ddd\}}}}| j||||ddd | j|||||d ||fS ||fS )a  
        Returns the final embedding cached response and a boolean indicating if all elements in the list have a cache hit

        For embedding responses, there can be a cache hit for some of the inputs in the list and a cache miss for others
        This function processes the cached embedding responses and returns the final embedding cached response and a boolean indicating if all elements in the list have a cache hit

        Args:
            final_embedding_cached_response: Optional[EmbeddingResponse]:
            cached_result: List[Optional[Dict[str, Any]]]:
            kwargs: Dict[str, Any]:
            logging_obj: LiteLLMLoggingObj:
            start_time: datetime.datetime:
            model: str:

        Returns:
            Tuple[Optional[EmbeddingResponse], bool]:
            Returns the final embedding cached response and a boolean indicating if all elements in the list have a cache hit


        FNry   r   r@   )r@   dataTrS   	embedding)r   indexobject)token_counter)textZcount_response_tokensprompt_tokenscompletion_tokenstotal_tokensrH   rI   rJ   rK   )rA   r@   rC   r$   rM   is_embeddingrQ   )r}   	enumerateappendlenr   rW   r   rT   r   r   r;   r{   rq   r   r   usager_   r`   r9   ra   rb   rd   )r>   r%   r$   rC   rA   r2   r@   r&   Zremaining_listr~   Zkwargs_input_as_listidxr   Z
model_namer   valZembedding_datar   r   rS   rR   rH   rw   rI   r-   r-   r.   ri   o  s   








z:LLMCachingHandler._process_async_embedding_cached_responseusage1usage2c                 C   s&   t |j|j |j|j |j|j dS )Nr   )r   r   r   r   )r>   r   r   r-   r-   r.   combine_usage  s
   


zLLMCachingHandler.combine_usage_caching_handler_responseembedding_responserR   c                 C   s   |j du r|S d}g }|j jD ]}|du r'|jdur'||j|  |d7 }q|| q||j _d|j jd< ||  d |j _|j jdurX|jdurX| j|j j|jd|j _|j S )a  
        Combines the cached embedding response with the API EmbeddingResponse

        For caching there can be a cache hit for some of the inputs in the list and a cache miss for others
        This function combines the cached embedding response with the API EmbeddingResponse

        Args:
            caching_handler_response: CachingHandlerResponse:
            embedding_response: EmbeddingResponse:

        Returns:
            EmbeddingResponse:
        Nr      TrS   rL   )r   r   )r%   r   r   rT   total_secondsZ_response_msr   r   )r>   r   r   r2   rR   r   Zfinal_data_listitemr-   r-   r.   2_combine_cached_embedding_response_with_api_result  s0   



zDLLMCachingHandler._combine_cached_embedding_response_with_api_resultrS   c                 C   s<   ddl m} |j|j||||dd |j||||d dS )a  
        Helper function to log the success of a cached result on callbacks

        Args:
            logging_obj (LiteLLMLoggingObj): The logging object.
            cached_result: The cached result.
            start_time (datetime): The start time of the operation.
            end_time (datetime): The end time of the operation.
            cache_hit (bool): Whether it was a cache hit.
        r   )GLOBAL_LOGGING_WORKERro   )Zasync_coroutineN)Z)litellm.litellm_core_utils.logging_workerr   Zensure_initialized_and_enqueueZasync_success_handlerru   )r>   rA   r$   r2   rR   rS   r   r-   r-   r.   rd     s   
z3LLMCachingHandler._async_log_cache_hit_on_callbacksc           
         sF  t jdu rdS | }|t| j| d}|tjjkr}t	|d t
r,|d g|d< nt	|d ts7tdg }t|d D ]\}}t jjd	i i |d|i}	|t jj|	| jd q?tj| I dH }|dur{t	|tr{tdd |D r{d}|S t j du rt jjd	d| ji|I dH }|S t jjd	d| ji|}|S )
a  
        Internal method to
        - get cache key
        - check what type of cache is used - Redis, RedisSemantic, Qdrant, S3
        - async get cache value
        - return the cached value

        Args:
            call_type: str:
            kwargs: Dict[str, Any]:
            args: Optional[Tuple[Any, ...]] = None:

        Returns:
            Optional[Any]:
        Raises:
            None
        Nry   rz   )rU   dynamic_cache_objectc                 s   s    | ]}|d u V  qd S Nr-   ).0rp   r-   r-   r.   	<genexpr>h  s    z9LLMCachingHandler._retrieve_from_cache.<locals>.<genexpr>Tr   r-   )r9   r:   rX   rr   rs   r0   r   rg   rh   r;   r{   r^   r|   r   re   r   Zasync_get_cacher=   asynciogatherallZ_supports_asyncrt   )
r>   rB   rC   rD   rv   r$   tasksr   ipreset_cache_keyr-   r-   r.   r]   5  sZ   



z&LLMCachingHandler._retrieve_from_cacherH   c           
      C   s  ddl m} |tjjks|tjjkr0t|tr0|dddu r)| j	||||d}n||t
 d}|tjjks<|tjjkr[t|tr[|dddu rS| j	||||d}nptdi |}nh|tjjksg|tjjkrut|tru||t dd	}nN|tjjks|tjjkrt|tr||d
dd	}n5|tjjks|tjjkrt|trd|dd}	||t d|	d}n|dks|dkrt|trtdi |}t|dr|jd
urt|jtrd|jd< t||||| jtj d |S )aH  
        Internal method to process the cached result

        Checks the call type and converts the cached result to the appropriate model response object
        example if call type is text_completion -> returns TextCompletionResponse object

        Args:
            cached_result: Any:
            call_type: str:
            kwargs: Dict[str, Any]:
            logging_obj: LiteLLMLoggingObj:
            model: str:
            custom_llm_provider: Optional[str] = None:
            args: Optional[Tuple[Any, ...]] = None:

        Returns:
            Optional[Any]:
        r   ) convert_to_model_response_objectrP   FT)r$   rB   rA   r@   )response_objectmodel_response_objectr   )r   r   response_typeNrerankz	whisper-1)r@   rH   rS   Zaudio_transcription)r   r   r   hidden_paramsZ
aresponses	responsesrT   rS   )rp   rA   r@   rC   r2   rR   r-   )rq   r   r   acompletionrh   
completionr;   r   rW   _convert_cached_stream_responser   atext_completionZtext_completionr   rg   r   r   Zarerankr   ZatranscriptionZtranscriptionr   r   rf   rT   r   r2   r_   r`   )
r>   r$   rB   rC   rA   r@   rD   rH   r   r   r-   r-   r.   rc   v  s   %


z:LLMCachingHandler._convert_cached_result_to_model_responsec           	      C   sR   ddl m}m}m} |tjjks|tjjkr||d}n||d}|||d|dS )Nr   )r"   convert_to_streaming_response#convert_to_streaming_response_async)r   cached_response)Zcompletion_streamr@   rH   rA   )rq   r"   r   r   r   r   rh   r   )	r>   r$   rB   rA   r@   r"   r   r   Z_stream_cached_resultr-   r-   r.   r     s   z1LLMCachingHandler._convert_cached_stream_responserp   c                    s  ddl m} tjdu rdS | }|t|| ||}||d< | j||drt|tj	sDt|tj
sDt|tsDt|tsDt|tr{t|t
rgtjdurgttjjtsgttjj|fd| ji| dS ttjj| fd| ji| dS ttjj|fi | dS dS )ah  
        Internal method to check the type of the result & cache used and adds the result to the cache accordingly

        Args:
            result: Any:
            original_function: Callable:
            kwargs: Dict[str, Any]:
            args: Optional[Tuple[Any, ...]] = None:

        Returns:
            None
        Raises:
            None
        r   r   NrF   r0   rC   r   )'litellm.litellm_core_utils.core_helpersr    r9   r:   rX   rr   rs   _should_store_result_in_cacher;   r   r   r   r   r   r   r   create_taskZasync_add_cache_pipeliner=   Zasync_add_cacheZmodel_dump_json)r>   rp   r0   rC   rD   r    rv   rF   r-   r-   r.   async_set_cache  sj   



z!LLMCachingHandler.async_set_cachec                 C   sP   |  }|t| j| tjdu rdS | j| j|dr&tjj|fi | dS )zE
        Sync internal method to add the result to the cache
        Nr   )rX   rr   rs   r0   r9   r:   r   Z	add_cache)r>   rp   rC   rD   rv   r-   r-   r.   sync_set_cache]  s   

z LLMCachingHandler.sync_set_cachec                 C   s@   t jduot jjduot|jt jjv o|di ddduS )z
        Helper function to determine if the result should be stored in the cache.

        Returns:
            bool: True if the result should be stored in the cache, False otherwise.
        Nr:   zno-storeFT)r9   r:   supported_call_typesr{   r'   rW   )r>   r0   rC   r-   r-   r.   r   x  s   


z/LLMCachingHandler._should_store_result_in_cachec                 C   s0   t jdurt jjdurt|jt jjv rdS dS )aO  
        Helper function to determine if the call type is supported by the cache.

        call types are acompletion, aembedding, atext_completion, atranscription, arerank

        Defined on `litellm.types.utils.CallTypes`

        Returns:
            bool: True if the call type is supported by the cache, False otherwise.
        NTF)r9   r:   r   r{   r'   )r>   r0   r-   r-   r.   r[     s
   
z2LLMCachingHandler._is_call_type_supported_by_cacheprocessed_chunkc                    sL   t || jtj | j| jdd}|dur$| j|| j| jdI dH  dS dS )z
        Internal method to add the streaming response to the cache


        - If 'streaming_chunk' has a 'finish_reason' then assemble a litellm.ModelResponse object
        - Else append the chunk to self.async_streaming_chunks

        Trp   r2   rR   r1   Zstreaming_chunksrM   N)rp   r0   rC   )r   r2   r_   r`   r1   r7   r   r0   r>   r   Zcomplete_streaming_responser-   r-   r.    _add_streaming_response_to_cache  s"   z2LLMCachingHandler._add_streaming_response_to_cachec                 C   s@   t || jtj | j| jdd}|dur| j|| jd dS dS )zQ
        Sync internal method to add the streaming response to the cache
        Fr   N)rp   rC   )r   r2   r_   r`   r1   r8   r   r   r-   r-   r.   %_sync_add_streaming_response_to_cache  s   
z7LLMCachingHandler._sync_add_streaming_response_to_cacheFrM   r   rN   c	           
      C   s   | dd|| dd| di | di | dd| di |d	}	tjdur4tjjdi ||	d
< nd|	d
< |j|| ddi |	|sJ| ddn| dd| ddt|d| dd|d
 td|d|_dS )a  
        Helper function to update the LiteLLMLoggingObj environment variables.

        Args:
            logging_obj (LiteLLMLoggingObj): The logging object to update.
            model (str): The model being used.
            kwargs (Dict[str, Any]): The keyword arguments from the original function call.
            cached_result (Any): The cached result to log.
            is_async (bool): Whether the call is asynchronous or not.
            is_embedding (bool): Whether the call is for embeddings or not.
            custom_llm_provider (Optional[str]): The custom llm provider being used.

        Returns:
            None
        	logger_fnNrI   rm   metadata
model_infoproxy_server_requeststream_response)r   r   rI   r   r   r   r   rH   r   usermessagesry   rJ   rP   F)
r@   r   Zoptional_paramslitellm_paramsry   rJ   original_responseZadditional_argsrP   rH   T)rS   rN   r-   )rW   r9   r:   Z!_get_preset_cache_key_from_kwargsZupdate_environment_variablesr{   r   Zcaching_details)
r>   rA   r@   rC   r$   rM   r   rH   rN   r   r-   r-   r.   rb     s>   










z9LLMCachingHandler._update_litellm_logging_obj_environmentr   )FNN),r'   r(   r)   r   r   r{   r   r_   r?   LiteLLMLoggingObjr	   r
   r#   rk   rx   r   r}   intr   r   r   r,   ri   r   r   r   rd   r]   r   r   r   r   r   r"   rc   r   r   r   r   r[   r   r   floatrb   r-   r-   r-   r.   r/   Y   s|   


	
 
	
S



m
4



I

	
 
%

K





	r/   r0   rD   .rE   c                 C   sR   t | }t|j }i }|r't|D ]\}}|t|k r&|| }|||< q|S r   )inspect	signaturer^   
parameterskeysr   r   )r0   rD   r   Zparam_namesZargs_to_kwargsr   arg
param_namer-   r-   r.   rs     s   
rs   r   )9r*   r   r_   r   rY   typingr   r   r   r   r   r   r   r	   r
   r   Zpydanticr   r9   Zlitellm._loggingr   r   r6   r   Zlitellm.caching.cachingr   Z?litellm.litellm_core_utils.llm_response_utils.response_metadatar   Z(litellm.litellm_core_utils.logging_utilsr   Zlitellm.types.cachingr   Zlitellm.types.llms.openair   Zlitellm.types.rerankr   Zlitellm.types.utilsr   r   r   r   r   r   r   r   Z*litellm.litellm_core_utils.litellm_loggingr   r   r   r    Z,litellm.litellm_core_utils.streaming_handlerr"   r#   r<   r/   r{   rs   r-   r-   r-   r.   <module>   sP    0(       C
