o
    ưi                     @   s"  d dl Z d dlZd dlmZmZ d dlmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlZd dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZ d dlm Z m!Z! er{d dl"m#Z$ d dl%m&Z' ee$ef Z#e'Z&neZ#eZ&G dd deZ(G dd deZ)dS )    N)datetime	timedelta)TYPE_CHECKINGAnyListLiteralOptionalTupleUnion)HTTPException)	BaseModel)	TypedDict)	DualCacheModelResponseverbose_proxy_logger)CustomLogger)!_get_parent_otel_span_from_kwargs)CommonProxyErrorsCurrentItemRateLimitUserAPIKeyAuth)get_key_model_rpm_limitget_key_model_tpm_limit)Span)InternalUsageCachec                   @   sV   e Zd ZU ee ed< ee ed< ee ed< ee ed< ee ed< ee ed< dS )CacheObjectcurrent_global_requestsrequest_count_api_keyrequest_count_api_key_modelrequest_count_user_idrequest_count_team_idrequest_count_end_user_idN)__name__
__module____qualname__r   dict__annotations__ r'   r'   c/home/app/Keep/.python/lib/python3.10/site-packages/litellm/proxy/hooks/parallel_request_limiter.pyr   !   s   
 r   c                       s@  e Zd ZdefddZdd Zdededed	e	d
e
de
de
dee de	ded deeeef  defddZdefddZ	d.dee	 defddZ	d.dee	 dee	 dee	 dee	 dee	 dee	 d ee defd!d"Zdededed	e	fd#d$Zd%d& Zd'd( Zd)e	dedee fd*d+Zdedef fd,d-Z  ZS )/!_PROXY_MaxParallelRequestsHandlerinternal_usage_cachec                 C   s
   || _ d S N)r*   )selfr*   r'   r'   r(   __init__,   s   
z*_PROXY_MaxParallelRequestsHandler.__init__c                 C   s:   zt | tjrt| W d S W d S  ty   Y d S w r+   )r   debuglitellmZset_verboseprint	Exception)r,   Zprint_statementr'   r'   r(   print_verbose/   s   
z/_PROXY_MaxParallelRequestsHandler.print_verboseuser_api_key_dictcachedata	call_typemax_parallel_requests	tpm_limit	rpm_limitcurrentr   rate_limit_type)keymodel_per_keyusercustomerteamvalues_to_update_in_cachereturnc                    sR  t d|
 d|  |d u r@|dks|dks|dkr2| jtjj d|
 d| d| d| 	dd	dd	d
}||	|f nYt|d |k rl|d |k rl|d |k rl|d d	 |d |d d	 d
}||	|f n-tdd|
 dtjj d|d  d| d|d  d| d|d  d| dt	| 
 id| jj|d|jddI d H  |S )NzCurrent Usage of z in this minute: r   z. Hit limit for z). Current limits: max_parallel_requests: z, tpm_limit: z, rpm_limit: additional_details   current_requestscurrent_tpmcurrent_rpmrG   rH   rI     z1LiteLLM Rate Limit Handler for rate limit type = z. z. current rpm: z, rpm limit: z, current tpm: z, tpm limit: z!, current max_parallel_requests: z, max_parallel_requests: retry-afterstatus_codedetailheaders<   T)
cache_listttllitellm_parent_otel_span
local_only)r   inforaise_rate_limit_errorr   "max_parallel_request_limit_reachedvalueappendintr   strtime_to_next_minuter*   async_batch_set_cacheparent_otel_span)r,   r3   r4   r5   r6   r7   r8   r9   r:   r   r;   rA   new_valr'   r'   r(   check_key_in_limits7   sD   "

Bz5_PROXY_MaxParallelRequestsHandler.check_key_in_limitsc                 C   s0   t  }|tdd jddd}||  }|S )NrE   )minutesr   )secondmicrosecond)r   nowr   replacetotal_seconds)r,   rd   Znext_minuteZseconds_to_next_minuter'   r'   r(   r\   p   s   z5_PROXY_MaxParallelRequestsHandler.time_to_next_minuteNrD   c                 C   s8   d}|dur|d | }t dd| dt|  id)zX
        Raise an HTTPException with a 429 status code and a retry-after header
        z"Max parallel request limit reachedN rJ   z#Max parallel request limit reached rK   rL   )r   r[   r\   )r,   rD   error_messager'   r'   r(   rV   |   s   z8_PROXY_MaxParallelRequestsHandler.raise_rate_limit_errorr   r   r   r    r!   r^   c           
         sp   ||||||g}| j j||dI d H }	|	d u r"td d d d d d dS t|	d |	d |	d |	d |	d |	d dS )	N)keysr^   )r   r   r   r   r    r!   r   rE               )r*   Zasync_batch_get_cacher   )
r,   r   r   r   r   r    r!   r^   ri   resultsr'   r'   r(   get_all_cache_objects   s:   	z7_PROXY_MaxParallelRequestsHandler.get_all_cache_objectsc           $         s  |  d |j}|j}|d u rtj}|d u ri }|di dd }t|dtj}|d u r1tj}t|dtj}	|	d u r?tj}	g }
d }|d urzd}| jj|d|j	dI d H }|d u r\d}||krl| j
d	| d
| dS | jj|dd|j	dI d H  |dd }t d}t d}t d}| d| d| }| j|d urdnd |d ur| d| dnd |d ur|d ur| d| d| dnd |jd ur|j d| dnd |jd ur|j d| dnd |jd ur|j d| dnd |j	dI d H }|d ur | d| d}| j||||||d |||	d|
dI d H  t|d us.t|d ur|dd }| d| d| d}t|}t|}d }d }|d ur`|rX||}|r`||}| j||||tj|d ||pqtj|pvtjd|
dI d H }d }d }|r|d ur||d  }|d ur||d  }d| |d| |i}d|vri |d< |d | |j}|d ur|j}|j}|d u rtj}|d u rtj}| d| d}| j||||tj|d |||d|
dI d H  |j}|d ur/|j} |j}!| d u r	tj} |!d u rtj}!| d| d}| j||||tj|d  || |!d!|
dI d H  |jrpt|d"tj}"t|d#tj}#|"d u rItj}"|#d u rQtj}#|j d| d}| j||||tj||d$ |"|#d%|
d&I d H  t| jj|
d'|j	d( d S ))Nz)Inside Max Parallel Request Pre-Call Hookmetadataglobal_max_parallel_requestsr8   r9   Tr<   rT   rS   rE   zHit Global Limit: Limit=z, current: rC   r<   rX   rT   rS   model%Y-%m-%d%H%M-::::request_count)r   r   r   r   r    r!   r^   r   r<   )r3   r4   r5   r6   r7   r:   r   r8   r9   r;   rA   r   r=   rH   rI   zlitellm-key-remaining-tokens-zlitellm-key-remaining-requests-r   r>   r    r@   end_user_tpm_limitend_user_rpm_limitr!   r?   )r3   r4   r5   r6   r7   r   r:   r8   r9   r;   rA   rP   rQ   rR   rS   )r2   api_keyr7   sysmaxsizegetgetattrr*   async_get_cacher^   rV   async_increment_cacher   rd   strftimero   user_idteam_idZend_user_idr`   r   r   updateuser_tpm_limituser_rpm_limitteam_tpm_limitteam_rpm_limitasynciocreate_taskr]   )$r,   r3   r4   r5   r6   r~   r7   rq   r8   r9   rA   r_   _keyr   _modelcurrent_datecurrent_hourcurrent_minuteprecise_minuteZcache_objectsr   Z_tpm_limit_for_key_modelZ_rpm_limit_for_key_modelZtpm_limit_for_modelZrpm_limit_for_modelZ_remaining_tokensZ_remaining_requestsZ_remaining_limits_datar   r   r   r   r   r   r{   r|   r'   r'   r(   async_pre_call_hook   s  
	



!

















z5_PROXY_MaxParallelRequestsHandler.async_pre_call_hookc              
      s  ddl m} t|d}z#| d |d d dd }|d d d }|d d d	d }	|d d d
d }
|d d dd }|d}|d d di pSi }|d urgd}| jj|dd|dI d H  t 	d}t 	d}t 	d}| d| d| }d}t
|tr|jj}g }|d ur| d| d}| jj||dI d H pdddd}t|d d d|d | |d d}| d| d|  |||f ||}|d ur2|d ur2d|v sd |v s|d ur2| d| d| d}| jj||dI d H pdddd}t|d d d|d | |d d}| d| d|  |||f |	d urd}t
|trC|jj}|	 d| d}| jj||dI d H p\d|dd}t|d d d|d | |d d}| d| d|  |||f |
d urd}t
|tr|jj}|
 d| d}| jj||dI d H pd|dd}t|d d d|d | |d d}| d| d|  |||f |d ur"d}t
|tr|jj}| d| d}| jj||dI d H pd|dd}t|d d d|d | |d d}| d| d|  |||f | jj|d!|d"I d H  W d S  tyI } z| | W Y d }~d S d }~ww )#Nr   )#get_model_group_from_litellm_kwargskwargsz5INSIDE parallel request limiter ASYNC SUCCESS LOGGINGlitellm_paramsrp   rq   user_api_keyuser_api_key_user_iduser_api_key_team_iduser_api_key_model_max_budgetr>   user_api_key_metadataTrs   ru   rv   rw   rx   ry   rz   r<   rS   rE   rF   rG   rH   rI   zupdated_value in success call: z, precise_minute: Zmodel_rpm_limitZmodel_tpm_limitrP   r}   )Z)litellm.proxy.common_utils.callback_utilsr   r   r2   r   r*   r   r   rd   r   
isinstancer   usagetotal_tokensr   maxrY   r]   r1   )r,   r   response_obj
start_timeend_timer   rS   rq   r   r   r   r   Zuser_api_key_end_user_idr   r   r   r   r   r   r   rA   r   r:   r_   Zmodel_grouper'   r'   r(   async_log_success_event  sF  

	
















z9_PROXY_MaxParallelRequestsHandler.async_log_success_eventc              
      s  z|  d t|d}|d di pi }|dd }|dd }|  d|  |d u r1W d S tjjt|d v r>W d S |d ur]d}	| jj|	d	|d
I d H  | jj	|	dd	|dI d H  t
 d}
t
 d}t
 d}|
 d| d| }| d| d}| jj||dI d H pdddd}t|d d d|d |d d}|  d|  | jj||d|dI d H  W d S  ty } ztdt| W Y d }~d S d }~ww )Nz(Inside Max Parallel Request Failure Hookr   r   rp   rq   r   zuser_api_key: 	exceptionTrr   r   rs   ru   rv   rw   rx   ry   rz   r   rE   r   rF   rG   rH   rI   zupdated_value in failure call: rP   )rR   rS   z;Inside Parallel Request Limiter: An exception occurred - {})r2   r   r   r   rW   rX   r[   r*   r   r   r   rd   r   r   Zasync_set_cacher1   r   r   format)r,   r   r   r   r   rS   	_metadatarq   r   r   r   r   r   r   r   r:   r_   r   r'   r'   r(   async_log_failure_event  s   

z9_PROXY_MaxParallelRequestsHandler.async_log_failure_eventr   c              
      s   ddl m} ddlm} ddlm} z|||| jjd|jddI dH }|du r+W dS |	 W S  t
yJ } z|dt| W Y d}~dS d}~ww )	a?  
        Helper to get the 'Internal User Object'

        It uses the `get_user_object` function from `litellm.proxy.auth.auth_checks`

        We need this because the UserApiKeyAuth object does not contain the rpm/tpm limits for a User AND there could be a perf impact by additionally reading the UserTable.
        r   r   )get_user_object)prisma_clientFN)r   r   Zuser_api_key_cacheZuser_id_upsertr^   Zproxy_logging_objz3Parallel Request Limiter: Error getting user object)litellm._loggingr   Zlitellm.proxy.auth.auth_checksr   Zlitellm.proxy.proxy_serverr   r*   Z
dual_cacher^   
model_dumpr1   r.   r[   )r,   r   r3   r   r   r   Z_user_id_rate_limitsr   r'   r'   r(   get_internal_user_object  s.   	
z:_PROXY_MaxParallelRequestsHandler.get_internal_user_objectc                    s  |j }t d}t d}t d}| d| d| }| d| d}	| jj|	|jdI dH }
d}d}d}d}|
dura|jdurR|j|
d	  }|j}|jdura|j|
d
  }|j}t	|drlt
|d}nd}|durt|ts|t|trt|tr| }|di pi }|dur||d< |dur||d< |dur||d< |dur||d< t|di |d|i t |||I dH S dS dS )z;
        Retrieve the key's remaining rate limits.
        ru   rv   rw   rx   ry   rz   r   NrI   rH   _hidden_paramsZadditional_headerszx-ratelimit-remaining-requestszx-ratelimit-limit-requestszx-ratelimit-remaining-tokenszx-ratelimit-limit-tokens)r~   r   rd   r   r*   r   r^   r9   r8   hasattrr   r   r   r%   r   r   setattrsuperasync_post_call_success_hook)r,   r5   r3   responser~   r   r   r   r   r   r:   Zkey_remaining_rpm_limitZkey_rpm_limitZkey_remaining_tpm_limitZkey_tpm_limitr   Z_additional_headers	__class__r'   r(   r   !  sv   





z>_PROXY_MaxParallelRequestsHandler.async_post_call_success_hookr+   )r"   r#   r$   r   r-   r2   r   r   r%   r[   rZ   r   r   r   r	   r   r`   floatr\   r   rV   r   r   ro   r   r   r   r   r   __classcell__r'   r'   r   r(   r)   *   s    	

9
	
*
   jT
$r)   )*r   r   r   r   typingr   r   r   r   r   r	   r
   Zfastapir   Zpydanticr   Ztyping_extensionsr   r/   r   r   r   r   Z"litellm.integrations.custom_loggerr   Z'litellm.litellm_core_utils.core_helpersr   Zlitellm.proxy._typesr   r   r   Zlitellm.proxy.auth.auth_utilsr   r   Zopentelemetry.tracer   _SpanZlitellm.proxy.utilsr   Z_InternalUsageCacher   r)   r'   r'   r'   r(   <module>   s.    $	