o
    ưi<                     @   s|   d Z ddlZddlZddlZddlmZmZ ddlZddlm	Z	 ddl
mZmZ ddlmZ ddlmZ G d	d
 d
eZdS )z
Qdrant Semantic Cache implementation

Has 4 methods:
    - set_cache
    - get_cache
    - async_set_cache
    - async_get_cache
    N)Anycast)print_verbose)QDRANT_SCALAR_QUANTILEQDRANT_VECTOR_SIZE)EmbeddingResponse   )	BaseCachec                   @   sd   e Zd Z								dddZdefddZdd	 Zd
d Zdd Zdd Z	dd Z
dd ZdS )QdrantSemanticCacheNtext-embedding-ada-002c	                 C   s  dd l }	ddlm}
m}m} ddlm} |d u rtd|| _t	d| j  |d u r/td|| _
|| _|d ur;|nt| _i }|rPt|trP|drP||}|r`t|tr`|dr`||}|pk|	dpk|	d	}|pr|	d
}ddi}|r}||d< |d u rtd|| _|| _t	d| j  || _|
 | _||jd| _|d u rt	d | jj| j d| j d| jd}|jdkrtd|j | d d r| jj| j d| j | jd}| | _t	d| j  d S |d u s|dkrdddii}n|dkrddtddi}n|d krd d!dd"i}ntd#| jj| j d| j | jd$d%|d&| jd'}| d rZ| jj| j d| j | jd}| | _t	d(| j  d S td))*Nr   )_get_httpx_clientget_async_httpx_clienthttpxSpecialProvider)get_secret_strz-collection_name must be provided, passed Nonez0qdrant semantic-cache initializing COLLECTION - z2similarity_threshold must be provided, passed Nonezos.environ/Z
QDRANT_URLZQDRANT_API_BASEZQDRANT_API_KEYzContent-Typezapplication/jsonzapi-keyzQdrant url must be providedz'qdrant semantic-cache qdrant_api_base: )Zllm_providerzNQuantization config is not provided. Default binary quantization will be used./collections/z/exists)urlheaders   z1Error from qdrant checking if /collections exist resultexistsz.Collection already exists.
Collection details:binary
always_ramFZscalarZint8)typeZquantiler   productZx16)compressionr   zBQuantization config must be one of 'scalar', 'binary' or 'product'ZCosine)sizeZdistance)Zvectorsquantization_config)r   jsonr   z+New collection created.
Collection details:z#Error while creating new collection) osZ&litellm.llms.custom_httpx.http_handlerr   r   r   Zlitellm.secret_managers.mainr   	Exceptioncollection_namer   similarity_thresholdembedding_modelr   vector_size
isinstancestr
startswithgetenv
ValueErrorqdrant_api_baseqdrant_api_keyr   sync_clientZCachingasync_clientgetstatus_codetextr   collection_infor   put)selfr)   r*   r    r!   r   r"   Z	host_typer#   r   r   r   r   r   r   Zcollection_existsZcollection_detailsZquantization_paramsZnew_collection_status r3   \/home/app/Keep/.python/lib/python3.10/site-packages/litellm/caching/qdrant_semantic_cache.py__init__   s   










zQdrantSemanticCache.__init__cached_responsec                 C   s<   |d u r|S zt |}W |S  ty   t|}Y |S w N)r   loadsr   astliteral_eval)r2   r6   r3   r3   r4   _get_cache_logic   s   z$QdrantSemanticCache._get_cache_logicc                 K   s   t d|  ddlm} |d }d}|D ]}||d 7 }qtttj| j|dddd	}|d
 d d }	t|}t	|ts@J dt|
 |	||ddgi}
| jj| j d| j d| j|
d d S )Nz)qdrant semantic-cache set_cache, kwargs: r   uuidmessages contentTzno-storezno-cachemodelinputcachedata	embeddingpointsr/   responseidvectorpayloadr   /pointsr   r   r   )r   litellm._uuidr=   r   r   litellmrG   r"   r%   r$   uuid4r+   r1   r)   r    r   )r2   keyvaluekwargsr=   r>   promptmessageembedding_responserG   rF   r3   r3   r4   	set_cache   s@   

zQdrantSemanticCache.set_cachec              	   K   sP  t d|  |d }d}|D ]}||d 7 }qtttj| j|dddd}|d d	 d
 }|dddddiddd}| jj| j d| j	 d| j
|d}	|	 d }
|
d u rZd S t|
trgt|
d	krgd S |
d	 d }|
d	 d d }t d| j d| d| d|  || jkr|
d	 d d }t d| d| d|  | j|d S d S )!Nz.sync qdrant semantic-cache get_cache, kwargs: r>   r?   r@   TrA   rB   rF   r   rG   quantizationF      @ignoreZrescoreZoversamplingr   rM   paramslimitZwith_payloadr   /points/searchrP   r   scorerN   r/   &semantic cache: similarity threshold: , similarity: 
, prompt: , closest_cached_prompt: rJ   got a cache hit, similarity: , Current prompt: , cached_prompt: r6   )r   r   r   rR   rG   r"   r+   postr)   r    r   r   r$   listlenr!   r;   )r2   rT   rV   r>   rW   rX   rY   rG   rF   search_responseresults
similaritycached_promptcached_valuer3   r3   r4   	get_cache   s^   


zQdrantSemanticCache.get_cachec              
      sZ  ddl m} ddlm}m} td|  |d }d}|D ]}	||	d 7 }q|d ur2dd	 |D ng }
|d urd| j|
v rd|d
i dd}|j| j|ddd|d|d
i dd ddI d H }nt	j| j|ddddI d H }|d d d }t
|}t|t
sJ dt
| |||ddgi}| jj| j d| j d| j|dI d H  d S )Nr   r<   llm_model_list
llm_routerz/async qdrant semantic-cache set_cache, kwargs: r>   r?   r@   c                 S      g | ]}|d  qS Z
model_namer3   .0mr3   r3   r4   
<listcomp>)      z7QdrantSemanticCache.async_set_cache.<locals>.<listcomp>metadatauser_api_keyTrA   trace_idr   zsemantic-cache-embeddingr   rC   rD   rE   r   rB   rF   rG   rH   rI   rK   r   rO   rP   )rQ   r=   litellm.proxy.proxy_serverrv   rw   r   r"   r-   
aembeddingrR   r%   r$   rS   r,   r1   r)   r    r   )r2   rT   rU   rV   r=   rv   rw   r>   rW   rX   router_model_namesr   rY   rG   rF   r3   r3   r4   async_set_cache  s\   
z#QdrantSemanticCache.async_set_cachec              
      s  t d|  ddlm}m} |d }d}|D ]}||d 7 }q|d ur,dd |D ng }|d ur^| j|v r^|d	i d
d}	|j| j|ddd|	d|d	i dd ddI d H }
ntj| j|ddddI d H }
|
d d d }|dddddiddd}| jj	| j
 d| j d| j|dI d H }| d }|d u rd|d	i d< d S t|trt|dkrd|d	i d< d S |d d }|d d  d! }t d"| j d#| d$| d%|  ||d	i d< || jkr|d d  d& }t d'| d(| d)|  | j|d*S d S )+Nz/async qdrant semantic-cache get_cache, kwargs: r   ru   r>   r?   r@   c                 S   rx   ry   r3   rz   r3   r3   r4   r}   f  r~   z7QdrantSemanticCache.async_get_cache.<locals>.<listcomp>r   r   TrA   r   r   r   rB   rF   rG   r[   Fr\   r]   r   r_   r   rb   rP   r   g        zsemantic-similarityrc   rN   r/   rd   re   rf   rg   rJ   rh   ri   rj   rk   )r   r   rv   rw   r"   r-   r   rR   r,   rl   r)   r    r   r   
setdefaultr$   rm   rn   r!   r;   )r2   rT   rV   rv   rw   r>   rW   rX   r   r   rY   rG   rF   ro   rp   rq   rr   rs   r3   r3   r4   async_get_cache[  s   
z#QdrantSemanticCache.async_get_cachec                    s   | j S r7   )r0   )r2   r3   r3   r4   _collection_info  s   z$QdrantSemanticCache._collection_infoc                    sF   g }|D ]}| | j|d |d fi | qtj| I d H  d S )Nr   r   )appendr   asynciogather)r2   Z
cache_listrV   tasksvalr3   r3   r4   async_set_cache_pipeline  s
   $z,QdrantSemanticCache.async_set_cache_pipeline)NNNNNr   NN)__name__
__module____qualname__r5   r   r;   rZ   rt   r   r   r   r   r3   r3   r3   r4   r
      s$    
 -C@Zr
   )__doc__r9   r   r   typingr   r   rR   Zlitellm._loggingr   Zlitellm.constantsr   r   Zlitellm.types.utilsr   Z
base_cacher	   r
   r3   r3   r3   r4   <module>   s    
