o
    ưi                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlT d d	lmZmZ d
dlmZ d
dlmZ d
dlm Z  d
dl!m"Z" d
dl#m$Z$ d
dl%m&Z& d
dl'm(Z( d
dl)m*Z* d
dl+m,Z, d
dl-m.Z. d
dl/m0Z0 dd Z1G dd de2eZ3G dd dZ4e5j6dddg dfdee5 dee2 dee2 d ee2 d!ee
e7  f
d"d#Z8e5j6dddg dfdee5 dee2 dee2 d ee2 d!ee
e7  f
d$d%Z9d&d' Z:dS )(    N)Enum)AnyDictListOptionalTupleUnion)	BaseModel)verbose_logger)CACHED_STREAMING_CHUNK_DELAY)ModelParamHelper)*)EmbeddingResponseall_litellm_params   )AzureBlobCache)	BaseCache)	DiskCache)	DualCache)GCSCache)InMemoryCache)QdrantSemanticCache)
RedisCache)RedisClusterCache)RedisSemanticCache)S3Cachec                 C   s:   zt |  tjrt|  W d S W d S  ty   Y d S w N)r
   debuglitellmZset_verboseprint	Exception)Zprint_statement r!   N/home/app/Keep/.python/lib/python3.10/site-packages/litellm/caching/caching.pyprint_verbose(   s   
r#   c                   @   s   e Zd ZdZdZdS )	CacheMode
default_ondefault_offN)__name__
__module____qualname__r%   r&   r!   r!   r!   r"   r$   1   s    r$   c                R   @   sn  e Zd Zejejddddddddg ddddddddddddddddddddddddddddddf(dee dee dee dee d	ee d
ee dee	 dee	 dee	 dee	 dee
e  dee dee dee dee dee dee deeeef  dee dee dee dee dee dee dee dee dee d ed!ee d"ee d#ee
 d$ee d%ee d&ee d'ee d(ee d)ed*ee d+ee d,ee fPd-d.Zd/efd0d1Zd2ed3ed/ee fd4d5Zd3ed/efd6d7Zd8ed9ee d/ee fd:d;Zd3ed/efd<d=Zd/ee fd>d?Zd@ed/dfdAdBZedCed/efdDdEZdFed/efdGdHZdIdJ ZdKee dLee	 fdMdNZdrdOee fdPdQZ	drdOee fdRdSZ dTdU Z!dVdW Z"	drdOee fdXdYZ#dZed[ee d/e$fd\d]Z%	^dsd_e&d`ed3edaed/e'eeef f
dbdcZ(	drdOee fdddeZ)dfdg Z*dhdi Z+djdk Z,dldm Z-dndo Z.d/efdpdqZ/dS )tCacheN
completionZacompletion	embeddingZ
aembeddingZatranscriptionZtranscriptionZatext_completionZtext_completionZarerankZrerank	responsesZ
aresponsesTztext-embedding-ada-002typemodehostportpassword	namespacettldefault_in_memory_ttldefault_in_redis_ttlsimilarity_thresholdsupported_call_typesazure_account_urlazure_blob_containers3_bucket_names3_region_names3_api_version
s3_use_ssl	s3_verifys3_endpoint_urls3_aws_access_key_ids3_aws_secret_access_keys3_aws_session_token	s3_configs3_pathgcs_bucket_namegcs_path_service_accountgcs_path$redis_semantic_cache_embedding_modelredis_semantic_cache_index_nameredis_flush_sizeredis_startup_nodesdisk_cache_dirqdrant_api_baseqdrant_api_keyqdrant_collection_nameqdrant_quantization_config%qdrant_semantic_cache_embedding_model!qdrant_semantic_cache_vector_sizegcp_service_accountgcp_ssl_ca_certsc)           ,      K   sb  |t jkrM|std}*|*durt|*trt|*}|r?|||||d|)}+|'dur.|'|+d< |(dur6|(|+d< tdi |+| _	nt
d||||d|)| _	nq|t jkrbtd||||
||d|)| _	n\|t jkrtt|!|"|#|
|$|%|&d| _	nJ|t jkr~t | _	n@|t jkrtd|||||||||||d	|)| _	n&|t jkrt|||d
| _	n|t jkrt||d| _	n|t jkrt| d| _	dtjvrtjd dtjvrtjd dtjvrtjd || _ || _!|| _"|| _#|| _$|pt%j&| _'| j!t jkr|dur|| _$| j!t jks| j!t jkr|	dur|	| _$| j"dur-t| j	t
r/| j"| j	_"dS dS dS )a&  
        Initializes the cache based on the given type.

        Args:
            type (str, optional): The type of cache to initialize. Can be "local", "redis", "redis-semantic", "qdrant-semantic", "s3" or "disk". Defaults to "local".

            # Redis Cache Args
            host (str, optional): The host address for the Redis cache. Required if type is "redis".
            port (int, optional): The port number for the Redis cache. Required if type is "redis".
            password (str, optional): The password for the Redis cache. Required if type is "redis".
            namespace (str, optional): The namespace for the Redis cache. Required if type is "redis".
            ttl (float, optional): The ttl for the Redis cache
            redis_flush_size (int, optional): The number of keys to flush at a time. Defaults to 1000. Only used if batch redis set caching is used.
            redis_startup_nodes (list, optional): The list of startup nodes for the Redis cache. Defaults to None.

            # Qdrant Cache Args
            qdrant_api_base (str, optional): The url for your qdrant cluster. Required if type is "qdrant-semantic".
            qdrant_api_key (str, optional): The api_key for the local or cloud qdrant cluster.
            qdrant_collection_name (str, optional): The name for your qdrant collection. Required if type is "qdrant-semantic".
            similarity_threshold (float, optional): The similarity threshold for semantic-caching, Required if type is "redis-semantic" or "qdrant-semantic".

            # Disk Cache Args
            disk_cache_dir (str, optional): The directory for the disk cache. Defaults to None.

            # S3 Cache Args
            s3_bucket_name (str, optional): The bucket name for the s3 cache. Defaults to None.
            s3_region_name (str, optional): The region name for the s3 cache. Defaults to None.
            s3_api_version (str, optional): The api version for the s3 cache. Defaults to None.
            s3_use_ssl (bool, optional): The use ssl for the s3 cache. Defaults to True.
            s3_verify (bool, optional): The verify for the s3 cache. Defaults to None.
            s3_endpoint_url (str, optional): The endpoint url for the s3 cache. Defaults to None.
            s3_aws_access_key_id (str, optional): The aws access key id for the s3 cache. Defaults to None.
            s3_aws_secret_access_key (str, optional): The aws secret access key for the s3 cache. Defaults to None.
            s3_aws_session_token (str, optional): The aws session token for the s3 cache. Defaults to None.
            s3_config (dict, optional): The config for the s3 cache. Defaults to None.

            # GCS Cache Args
            gcs_bucket_name (str, optional): The bucket name for the gcs cache. Defaults to None.
            gcs_path_service_account (str, optional): Path to the service account json.
            gcs_path (str, optional): Folder path inside the bucket to store cache files.

            # Common Cache Args
            supported_call_types (list, optional): List of call types to cache for. Defaults to cache == on for all call types.
            **kwargs: Additional keyword arguments for redis.Redis() cache

        Raises:
            ValueError: If an invalid cache type is provided.

        Returns:
            None. Cache is set as a litellm param
        ZREDIS_CLUSTER_NODESN)r1   r2   r3   rL   Zstartup_nodesrU   rV   )r1   r2   r3   rL   )r1   r2   r3   r8   embedding_modelZ
index_name)rO   rP   Zcollection_namer8   Zquantization_configrW   Zvector_size)r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   )Zbucket_nameZpath_service_accountrI   )Zaccount_url	container)rN   cacher!   )(LiteLLMCacheTypeZREDISr   Z
get_secret
isinstancestrjsonloadsr   rY   r   ZREDIS_SEMANTICr   ZQDRANT_SEMANTICr   LOCALr   ZS3r   ZGCSr   Z
AZURE_BLOBr   ZDISKr   input_callbackappendsuccess_callbacklogging_callback_manageradd_litellm_success_callback_async_success_callback"add_litellm_async_success_callbackr9   r/   r4   rL   r5   r$   r%   r0   ),selfr/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   kwargsZ_env_cluster_nodesZcluster_kwargsr!   r!   r"   __init__8   s   
p






	

	











zCache.__init__returnc           	      K   s   d}| j di |}|durtd| |S t }t}|D ]?}||v r<| ||}|dur;|t| dt| 7 }q||vr]tj	du r]|| du rLq|| }|t| dt| 7 }qtd| t
|}| j|fi |}| jdd|i| |S )	a
  
        Get the cache key for the given arguments.

        Args:
            **kwargs: kwargs to litellm.completion() or embedding()

        Returns:
            str: The cache key generated from the arguments, or None if no cache key could be generated.
         Nz
Returning preset cache key: %sz: Tz
Created cache key: %spreset_cache_keyr!   )!_get_preset_cache_key_from_kwargsr
   r   r   Z_get_all_llm_api_paramsr   _get_param_valuer\   r   Z3enable_caching_on_provider_specific_optional_paramsr*   _get_hashed_cache_key_add_namespace_to_cache_key_set_preset_cache_key_in_kwargs)	rg   rh   	cache_keyrl   Zcombined_kwargsZlitellm_param_kwargsparamZparam_valueZhashed_cache_keyr!   r!   r"   get_cache_key  s:   


zCache.get_cache_keyrs   rh   c                 C   s,   |dkr	|  |S |dkr| |S || S )z?
        Get the value for the given param from kwargs
        modelfile)_get_model_param_value_get_file_param_value)rg   rs   rh   r!   r!   r"   rn   @  s
   

zCache._get_param_valuec                 C   s`   | di pi }| di pi }| di pi }| dp!| d}| ||}|p/|p/|d S )a  
        Handles getting the value for the 'model' param from kwargs

        1. If caching groups are set, then return the caching group as the model https://docs.litellm.ai/docs/routing#caching-across-model-groups
        2. Else if a model_group is set, then return the model_group as the model. This is used for all requests sent through the litellm.Router()
        3. Else use the `model` passed in kwargs
        metadatalitellm_paramsmodel_groupru   )get_get_caching_group)rg   rh   ry   rz   Zmetadata_in_litellm_paramsr{   Zcaching_groupr!   r!   r"   rw   N  s   zCache._get_model_param_valuery   r{   c                 C   s2   | dg }|r|D ]}||v rt|  S q
d S )Ncaching_groups)r|   r\   )rg   ry   r{   r~   groupr!   r!   r"   r}   _  s   zCache._get_caching_groupc                 C   sL   | d}| di }| di }| dp%t|ddp%| dp%| dS )zo
        Handles getting the value for the 'file' param from kwargs. Used for `transcription` requests
        rv   ry   rz   Zfile_checksumnameN	file_name)r|   getattr)rg   rh   rv   ry   rz   r!   r!   r"   rx   i  s   


zCache._get_file_param_valuec                 K   s    |rd|v r|d  ddS dS )a  
        Get the preset cache key from kwargs["litellm_params"]

        We use _get_preset_cache_keys for two reasons

        1. optional params like max_tokens, get transformed for bedrock -> max_new_tokens
        2. avoid doing duplicate / repeated work
        rz   rl   N)r|   )rg   rh   r!   r!   r"   rm   w  s   	z'Cache._get_preset_cache_key_from_kwargsrl   c                 K   s$   |rd|v r||d d< dS dS dS )z
        Set the calculated cache key in kwargs

        This is used to avoid doing duplicate / repeated work

        Placed in kwargs["litellm_params"]
        rz   rl   Nr!   )rg   rl   rh   r!   r!   r"   rq     s
   z%Cache._set_preset_cache_key_in_kwargsrr   c                 C   s&   t |  }| }td| |S )z
        Get the hashed cache key for the given cache key.

        Use hashlib to create a sha256 hash of the cache key

        Args:
            cache_key (str): The cache key to hash.

        Returns:
            str: The hashed cache key.
        zHashed cache key (SHA-256): %s)hashlibsha256encode	hexdigestr
   r   )rr   Zhash_objecthash_hexr!   r!   r"   ro     s   zCache._get_hashed_cache_keyr   c                 K   sP   | di }| dp| di  dp| j}|r | d| }td| |S )a  
        If a redis namespace is provided, add it to the cache key

        Args:
            hash_hex (str): The hashed cache key.
            **kwargs: Additional keyword arguments.

        Returns:
            str: The final hashed cache key with the redis namespace.
        rY   r4   ry   Zredis_namespace:zFinal hashed key: %s)r|   r4   r
   r   )rg   r   rh   Zdynamic_cache_controlr4   r!   r!   r"   rp     s   
z!Cache._add_namespace_to_cache_keyc                 c   sL    d}t dt||D ]}ddd||||  digiV  tt qd S )N   r   choicesdeltaZ	assistant)Zrolecontent)rangelentimesleepr   )rg   r   
chunk_sizeir!   r!   r"   generate_streaming_content  s   
z Cache.generate_streaming_contentcached_resultmax_agec                 C   s   |durHt |trHd|v rH|d }t }|| }|dur#||kr#dS |d}zt |tr1W |S t|}W |S  tyG   t|}Y |S w |S )zL
        Common get cache logic across sync + async implementations
        N	timestampresponse)	r[   dictr   r|   r]   r^   r    astliteral_eval)rg   r   r   r   current_timeZresponse_agecached_responser!   r!   r"   _get_cache_logic  s.   


zCache._get_cache_logicdynamic_cache_objectc                 K   s   z[| j di |durW dS |dg }d|v r|d }n| jdi |}|durY|di }|dp<|dp<td}|durI|j||d	}n| jj||d	}| j||d
W S W dS  tyn   tdt	
   Y dS w )a&  
        Retrieves the cached result for the given arguments.

        Args:
            *args: args to litellm.completion() or embedding()
            **kwargs: kwargs to litellm.completion() or embedding()

        Returns:
            The cached result if it exists, otherwise None.
        TNmessagesrr   rY   s-maxage	s-max-ageinf)r   r   r   An exception occurred: r!   )should_use_cacher|   rt   float	get_cacherY   r   r    r#   	traceback
format_exc)rg   r   rh   r   rr   cache_control_argsr   r   r!   r!   r"   r     s6   

zCache.get_cachec                    s   zc| j di |durW dS |dg  d|v r|d }n| jdi |}|durb|di }|d|dtd}|durM|j|fi |I dH }n| jj|fi |I dH }| j||d	W S W dS  tyw   td
t	
   Y dS w )zd
        Async get cache implementation.

        Used for embedding calls in async wrapper
        TNr   rr   rY   r   r   r   r   r   r!   )r   r|   rt   r   async_get_cacherY   r   r    r#   r   r   )rg   r   rh   rr   r   r   r   r!   r!   r"   r     s@   	
zCache.async_get_cachec           	   
   K   s   zUd|v r
|d }n| j di |}|durRt|tr| }| jdur)| j|d< |dd}t|trE| D ]\}}|dkrD||d< q8t |d}|||fW S t	d t	yc } z|d}~ww )zO
        Common implementation across sync + async add_cache functions
        rr   Nr5   rY   )r   r   zcache key is Noner!   )
rt   r[   r	   Zmodel_dump_jsonr5   r|   r   itemsr   r    )	rg   resultrh   rr   Z_cache_kwargskvcached_dataer!   r!   r"   _add_cache_logic=  s,   




zCache._add_cache_logicc              
   K   s   z(| j di |durW dS | jdd|i|\}}}| jj||fi | W dS  tyE } ztdt|  W Y d}~dS d}~ww )z
        Adds a result to the cache.

        Args:
            *args: args to litellm.completion() or embedding()
            **kwargs: kwargs to litellm.completion() or embedding()

        Returns:
            None
        TNr   #LiteLLM Cache: Excepton add_cache: r!   )r   r   rY   Z	set_cacher    r
   	exceptionr\   )rg   r   rh   rr   r   r   r!   r!   r"   	add_cache[  s   "zCache.add_cachec              
      s   zX| j di |durW dS | jdkr(| jdur(| j|fi |I dH  W dS | jdd|i|\}}}|durI|j||fi |I dH  W dS | jj||fi |I dH  W dS  tyv } zt	dt
|  W Y d}~dS d}~ww )z3
        Async implementation of add_cache
        TNZredisr   r   r!   )r   r/   rL   batch_cache_writer   Zasync_set_cacherY   r    r
   r   r\   )rg   r   r   rh   rr   r   r   r!   r!   r"   async_add_cachep  s,   ""zCache.async_add_cacheembedding_responseru   c              
   C   s   zEt |tr|d|d|d|dW S t|dr1| }|d|d|d|dW S t|}|d|d|d|dW S  tyX } ztd| d}~ww )zh
        Convert any embedding response into the standardized CachedEmbedding TypedDict format.
        r-   indexobject)r-   r   r   ru   
model_dumpz,Missing expected key in embedding response: N)r[   r   r|   hasattrr   varsKeyError
ValueError)rg   r   ru   datar   r!   r!   r"   _convert_to_cached_embedding  s0   

z"Cache._convert_to_cached_embeddingr   r   inputidx_in_result_datac                 C   sd   | j di i |d|i}||d< |j| }|j}| ||}| jdd|i|\}	}
}|	|
|fS )Nr   rr   r   r!   )rt   r   ru   r   r   )rg   r   r   rh   r   rl   r   Z
model_nameZembedding_dictrr   r   r!   r!   r"   add_embedding_response_to_cache  s   

z%Cache.add_embedding_response_to_cachec           
   
      s8  z}| j di |durW dS | jdur| j|d< g }t|d tr@t|d D ]\}}| ||||\}}}|||f q(nt|d trZ| ||d |\}}}|||f |durn|jdd|i|I dH  W dS | j	jdd|i|I dH  W dS  t
y }	 ztdt|	  W Y d}	~	dS d}	~	ww )z
        Async implementation of add_cache for Embedding calls

        Does a bulk write, to prevent using too many clients
        TNr5   r   
cache_listr   r!   )r   r5   r[   list	enumerater   ra   r\   Zasync_set_cache_pipelinerY   r    r
   r   )
rg   r   r   rh   r   idxr   rr   r   r   r!   r!   r"   async_add_cache_pipeline  sH   




"zCache.async_add_cache_pipelinec                 K   sP   | j tjkrdS |dd}td|| |r&t|tr&|dddu r&dS dS )z
        Returns true if we should use the cache for LLM API calls

        If cache is default_on then this is True
        If cache is default_off then this is only true when user has opted in to use cache
        TrY   Nz(should_use_cache: kwargs: %s; _cache: %sz	use-cacheF)r0   r$   r%   r|   r
   r   r[   r   )rg   rh   _cacher!   r!   r"   r     s   zCache.should_use_cachec                    s<   | j dd|i|\}}}| jj||fi |I d H  d S )Nr   r!   )r   rY   r   )rg   r   rh   rr   r   r!   r!   r"   r     s    zCache.batch_cache_writec                    s"   t | jd}|r| I d H S d S )Npingr   rY   )rg   Z
cache_pingr!   r!   r"   r     s
   z
Cache.pingc                    s$   t | jd}|r||I d H S d S )Ndelete_cache_keysr   )rg   keysZcache_delete_cache_keysr!   r!   r"   r     s
   zCache.delete_cache_keysc                    s&   t | jdr| j I d H  d S d S )N
disconnect)r   rY   r   rg   r!   r!   r"   r     s   zCache.disconnectc                 C   s   dS )z
        Internal method to check if the cache type supports async get/set operations

        All cache types now support async operations

        Tr!   r   r!   r!   r"   _supports_async  s   zCache._supports_asyncr   )r   )0r'   r(   r)   rZ   r_   r$   r%   r   r\   r   r   CachingSupportedCallTypesboolr   r   intri   rt   r   rn   rw   r}   rx   rm   rq   staticmethodro   rp   r   r   r   r   r   r   r   r   ZCachedEmbeddingr   r   r   r   r   r   r   r   r   r   r   r!   r!   r!   r"   r*   7   s   	

 !"#$%&'()*+,-./012345679:
 ],



'(
&

&

*r*   r+   r/   r1   r2   r3   r9   c                 K   s   t d dtjvrtjd dtjvrtjd dtjvr%tjd tj	du r8t
d| ||||d|t_	t dtj	  t dttj	  dS )a  
    Enable cache with the specified configuration.

    Args:
        type (Optional[Literal["local", "redis", "s3", "disk"]]): The type of cache to enable. Defaults to "local".
        host (Optional[str]): The host address of the cache server. Defaults to None.
        port (Optional[str]): The port number of the cache server. Defaults to None.
        password (Optional[str]): The password for the cache server. Defaults to None.
        supported_call_types (Optional[List[Literal["completion", "acompletion", "embedding", "aembedding"]]]):
            The supported call types for the cache. Defaults to ["completion", "acompletion", "embedding", "aembedding"].
        **kwargs: Additional keyword arguments.

    Returns:
        None

    Raises:
        None
    zLiteLLM: Enabling CacherY   Nr/   r1   r2   r3   r9   z&LiteLLM: Cache enabled, litellm.cache=LiteLLM Cache: r!   )r#   r   r`   ra   rb   rc   rd   re   rf   rY   r*   r   r/   r1   r2   r3   r9   rh   r!   r!   r"   enable_cache  s&   '



r   c                 K   sL   t d td| ||||d|t_t dtj  t dttj  dS )a  
    Update the cache for LiteLLM.

    Args:
        type (Optional[Literal["local", "redis", "s3", "disk"]]): The type of cache. Defaults to "local".
        host (Optional[str]): The host of the cache. Defaults to None.
        port (Optional[str]): The port of the cache. Defaults to None.
        password (Optional[str]): The password for the cache. Defaults to None.
        supported_call_types (Optional[List[Literal["completion", "acompletion", "embedding", "aembedding"]]]):
            The supported call types for the cache. Defaults to ["completion", "acompletion", "embedding", "aembedding"].
        **kwargs: Additional keyword arguments for the cache.

    Returns:
        None

    zLiteLLM: Updating Cacher   z&LiteLLM: Cache Updated, litellm.cache=r   Nr!   )r#   r*   r   rY   r   r   r!   r!   r"   update_cacheW  s   %r   c                  C   sz   ddl m}  td | t tjd tjd tjd W d   n1 s+w   Y  dt_	tdtj	  dS )aF  
    Disable the cache used by LiteLLM.

    This function disables the cache used by the LiteLLM module. It removes the cache-related callbacks from the input_callback, success_callback, and _async_success_callback lists. It also sets the litellm.cache attribute to None.

    Parameters:
    None

    Returns:
    None
    r   suppresszLiteLLM: Disabling CacherY   Nz'LiteLLM: Cache disabled, litellm.cache=)

contextlibr   r#   r   r   r`   removerb   re   rY   r   r!   r!   r"   disable_cache  s   
r   );r   r   r]   r   r   enumr   typingr   r   r   r   r   r   Zpydanticr	   r   Zlitellm._loggingr
   Zlitellm.constantsr   Z-litellm.litellm_core_utils.model_param_helperr   Zlitellm.types.cachingZlitellm.types.utilsr   r   Zazure_blob_cacher   Z
base_cacher   Z
disk_cacher   Z
dual_cacher   Z	gcs_cacher   Zin_memory_cacher   Zqdrant_semantic_cacher   redis_cacher   Zredis_cluster_cacher   Zredis_semantic_cacher   Zs3_cacher   r#   r\   r$   r*   rZ   r_   r   r   r   r   r!   r!   r!   r"   <module>   s   	 	     j

=

2