o
    ưi                     @   s  d Z ddlZddlmZmZmZ ddlmZmZmZm	Z	m
Z
mZ ddlZddlmZ ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZm Z m!Z! ddl"m#Z# ddl"m#Z$ ddl"m%Z%m&Z& dZ'G dd dZ(G dd deZ)dS )a  
Provider budget limiting

Use this if you want to set $ budget limits for each provider.

Note: This is a filter, like tag-routing. Meaning it will accept healthy deployments and then filter out deployments that have exceeded their budget limit.

This means you can use this with weighted-pick, lowest-latency, simple-shuffle, routing etc

Example:
```
openai:
	budget_limit: 0.000000000001
	time_period: 1d
anthropic:
	budget_limit: 100
	time_period: 7d
```
    N)datetime	timedeltatimezone)AnyDictListOptionalTupleUnion)verbose_router_logger)	DualCache)RedisPipelineIncrementOperation)CustomLoggerSpan)duration_in_seconds)_get_tags_from_request_kwargs)%_get_prometheus_logger_from_callbacks)AllMessageValues)DeploymentTypedDictLiteLLM_ParamsRouterErrors)BudgetConfig)GenericBudgetConfigTypeStandardLoggingPayload   c                   @   s   e Zd ZdZdZdeeef fddZdedefdd	Z	dedefd
dZ
dedefddZd dededefddZdd Zdd Zdd Zdd ZdefddZdeeef fddZdeeef fddZdS )!_LiteLLMParamsDictViewz
    Lightweight attribute view over `litellm_params` dict.

    This avoids pydantic construction in request hot-path while preserving
    attribute-style access used by `litellm.get_llm_provider(...)`.
    _paramsparamsc                 C   s
   || _ d S Nr   )selfr    r!   ]/home/app/Keep/.python/lib/python3.10/site-packages/litellm/router_strategy/budget_limiter.py__init__6      
z_LiteLLMParamsDictView.__init__keyreturnc                 C      | j |S r   r   getr    r%   r!   r!   r"   __getattr__9      z"_LiteLLMParamsDictView.__getattr__c                 C   r'   r   r(   r*   r!   r!   r"   __getitem__<   r,   z"_LiteLLMParamsDictView.__getitem__c                 C   s
   || j v S r   r   r*   r!   r!   r"   __contains__?   r$   z#_LiteLLMParamsDictView.__contains__Ndefaultc                 C   s   | j ||S r   r(   )r    r%   r/   r!   r!   r"   r)   B   s   z_LiteLLMParamsDictView.getc                 C   
   | j  S r   )r   keysr    r!   r!   r"   r1   E   r$   z_LiteLLMParamsDictView.keysc                 C   r0   r   )r   valuesr2   r!   r!   r"   r3   H   r$   z_LiteLLMParamsDictView.valuesc                 C   r0   r   )r   itemsr2   r!   r!   r"   r4   K   r$   z_LiteLLMParamsDictView.itemsc                 C   
   t | jS r   )iterr   r2   r!   r!   r"   __iter__N   r$   z_LiteLLMParamsDictView.__iter__c                 C   r5   r   )lenr   r2   r!   r!   r"   __len__Q   r$   z_LiteLLMParamsDictView.__len__c                 C   r5   r   dictr   r2   r!   r!   r"   r;   T   r$   z_LiteLLMParamsDictView.dictc                 C   r5   r   r:   r2   r!   r!   r"   
model_dumpW   r$   z!_LiteLLMParamsDictView.model_dumpr   )__name__
__module____qualname____doc__	__slots__r   strr   r#   r+   r-   boolr.   r)   r1   r3   r4   r7   intr9   r;   r<   r!   r!   r!   r"   r   ,   s    r   c                   @   s  e Zd Z	dNdedee deeee ee	e
ef  f  fddZ		dOde
ded	eee  d
ee dee dee fddZdee	e
ef  dee	e
ef  de	e
ef de	e
ef deee
  de	e
ef dee
 deee	e
ef  e
f fddZ	dNdee	e
ef  d
ee	 deee
 e	e
ef e	e
ef eee
  f fddZde
dededefddZde
de
dedededefd d!Zde
ded"efd#d$Zd%d& Zd'ede
de
defd(d)Zd*d+ Zd,d- Zd.d/ Zd0e
dee fd1d2Zd3e
dee fd4d5Zd6e
dee fd7d8Z d9e	dee
 fd:d;Z!d3e
d<ed=efd>d?Z"d3e
dee fd@dAZ#d3e
dee
 fdBdCZ$d3e
d'efdDdEZ%e&	dNdee deeee ee	e
ef  f  fdFdGZ'dHdI Z(	dNdeeee ee	e
ef  f  fdJdKZ)dLdM Z*dS )PRouterBudgetLimitingN
dual_cacheprovider_budget_config
model_listc                 C   sh   || _ g | _t|   || _d | _d | _|   | j	|d | 
  ttjtr2tj|  d S d S )N)rH   )rF   redis_increment_operation_queueasynciocreate_task(periodic_sync_in_memory_spend_with_redisrG   deployment_budget_configtag_budget_config_init_provider_budgets_init_deployment_budgets_init_tag_budgets
isinstancelitellm	callbackslistZlogging_callback_managerZadd_litellm_callback)r    rF   rG   rH   r!   r!   r"   r#   \   s   zRouterBudgetLimiting.__init__modelhealthy_deploymentsmessagesrequest_kwargsparent_otel_spanr&   c              
      s   t |tr	|g}t|dkr|S g }| j||dI dH \}}}	}
t|dkrt| jj||dI dH }|p:dgt| }i }t|D ]\}}t|| pKd||< qA| j|||	|
||t	|dd\}}t|dkrrt
tjj d| |S |S )	z
        Filter out deployments that have exceeded their provider budget limit.


        Example:
        if deployment = openai/gpt-3.5-turbo
            and openai spend > openai budget limit
                then skip this deployment
        r   )rW   rY   N)r1   rZ           rY   )rW   provider_configsdeployment_configsdeployment_providers	spend_mappotential_deploymentsrequest_tags: )rR   r;   r8   0_async_get_cache_keys_for_router_budget_limitingrF   async_batch_get_cache	enumeratefloat$_filter_out_deployments_above_budgetr   
ValueErrorr   Z+no_deployments_with_provider_budget_routingvalue)r    rV   rW   rX   rY   rZ   ra   
cache_keysr]   r^   r_   Z_current_spendsZcurrent_spendsr`   idxr%   deployment_above_budget_infor!   r!   r"   async_filter_deploymentst   sX   

z-RouterBudgetLimiting.async_filter_deploymentsra   r]   r^   r_   r`   rb   c                 C   s$  d}t |D ]\}	}
d}| jrb|	t|k r||	 }n| |
}||v rb|| }|jdu r.q|d| d|j d}| j|||jd |jrb||jkrbd| d	| d
|j }|| d7 }d}q| jr|r|
d}|
dpri }|d}|
di d}||v r|| }|d| d|j d}|jr||jkrd| d| d| d	| d
|j 
}t	
| || d7 }d}q| jr|r|D ]<}| |}|r|d| d|j d}|jr||jkrd| d| d|j }t	
| || d7 }d}qq|r||
 q||fS )a  
        Filter out deployments that have exceeded their budget limit.
        Follow budget checks are run here:
            - Provider budget
            - Deployment budget
            - Request tags budget
        Returns:
            Tuple[List[Dict[str, Any]], str]:
                - A tuple containing the filtered deployments
                - A string containing debug information about deployments that exceeded their budget limit.
         TNprovider_spend::r[   providerspendbudget_limitzExceeded budget for provider rc   z >= 
FZ
model_namelitellm_paramsrV   
model_infoiddeployment_spend:z+Exceeded budget for deployment model_name: z, litellm_params.model: , model_id: 
tag_spend:zExceeded budget for tag='z', tag_spend=z, tag_budget_limit=)rf   rG   r8    _get_llm_provider_for_deployment
max_budgetr)   budget_duration+_track_provider_remaining_budget_prometheusrM   r   debugrN   _get_budget_config_for_tagappend)r    ra   rW   r]   r^   r_   r`   rb   rm   rl   
deploymentZis_within_budgetrs   configcurrent_spendZ	debug_msgZ_model_name_litellm_paramsZ_litellm_model_namemodel_id_tag_tag_budget_configZ
_tag_spendr!   r!   r"   rh      sz   





$



z9RouterBudgetLimiting._filter_out_deployments_above_budgetc                    s  g }i }i }g }|D ]z}| j r<| |}|| |dur<| |}	|	dur<|	jdur<|	||< |d| d|	j  | jre|di d}
|
dure| |
}	|	dure|	||
< |d|
 d|	j  | jrt	|d}|D ]}| 
|}|r|d| d|j  qoq||||fS )	aB  
        Returns list of cache keys to fetch from router cache for budget limiting and provider and deployment configs

        Returns:
            Tuple[List[str], Dict[str, GenericBudgetInfo], Dict[str, GenericBudgetInfo], List[Optional[str]]]:
                - List of cache keys to fetch from router cache for budget limiting
                - Dict of provider budget configs `provider_configs`
                - Dict of deployment budget configs `deployment_configs`
                - List of resolved providers aligned by deployment index `deployment_providers`
        Nrp   rq   rx   ry   rz   r\   r|   )rG   r}   r   _get_budget_config_for_providerr   rM   r)   !_get_budget_config_for_deploymentrN   r   r   )r    rW   rY   rk   r]   r^   r_   r   rs   budget_configr   rb   r   r   r!   r!   r"   rd     sT   





zERouterBudgetLimiting._async_get_cache_keys_for_router_budget_limitingstart_time_keycurrent_timettl_secondsc                    s@   | j |I dH }|du r| j j|||dI dH  |S t|S )z
        Checks if the key = `provider_budget_start_time:{provider}` exists in cache.

        If it does, return the value.
        If it does not, set the key to `current_time` and return the value.
        Nr%   rj   ttl)rF   async_get_cacheasync_set_cacherg   )r    r   r   r   budget_startr!   r!   r"   _get_or_set_budget_start_time_  s   	z2RouterBudgetLimiting._get_or_set_budget_start_time	spend_keyresponse_costc                    s6   | j j|||dI dH  | j j|||dI dH  |S )a  
        Handle start of new budget window by resetting spend and start time

        Enters this when:
        - The budget does not exist in cache, so we need to set it
        - The budget window has expired, so we need to reset everything

        Does 2 things:
        - stores key: `provider_spend:{provider}:1d`, value: response_cost
        - stores key: `provider_budget_start_time:{provider}`, value: current_time.
            This stores the start time of the new budget window
        r   N)rF   r   )r    r   r   r   r   r   r!   r!   r"   _handle_new_budget_windowp  s   z.RouterBudgetLimiting._handle_new_budget_windowr   c                    s:   | j jj|||dI dH  t|||d}| j| dS )a  
        Increment spend within existing budget window

        Runs once the budget start time exists in Redis Cache (on the 2nd and subsequent requests to the same provider)

        - Increments the spend in memory cache (so spend instantly updated in memory)
        - Queues the increment operation to Redis Pipeline (using batched pipeline to optimize performance. Using Redis for multi instance environment of LiteLLM)
        r   N)r%   Zincrement_valuer   )rF   in_memory_cacheZasync_incrementr   rI   r   )r    r   r   r   Zincrement_opr!   r!   r"   "_increment_spend_in_current_window  s   z7RouterBudgetLimiting._increment_spend_in_current_windowc                    sT  t d |dd}|du rtd|dd}t|dd}|d	i d
d}|du r4td| |}	|	rUd| d|	j }
d| }| j|	|
||dI dH  | |}|rvd| d|j }d| }| j||||dI dH  t	|}t
|dkr|D ]%}| |}|rd| d|j }d| }| j||||dI dH  qdS dS )z)Original method now uses helper functionsz/in RouterBudgetLimiting.async_log_success_eventZstandard_logging_objectNz$standard_logging_payload is requiredr   r   r   ro   rw   custom_llm_providerzcustom_llm_provider is requiredrp   rq   provider_budget_start_time:)r   r   r   r   rz   zdeployment_budget_start_time:r|   ztag_budget_start_time:)r   r   r)   ri   rB   r   r   _increment_spend_for_keyr   r   r8   r   )r    kwargsZresponse_obj
start_timeend_timeZstandard_logging_payloadr   r   r   r   r   r   rM   Zdeployment_spend_keyZdeployment_start_time_keyrb   r   r   Z_tag_spend_keyZ_tag_start_time_keyr!   r!   r"   async_log_success_event  sh   






z,RouterBudgetLimiting.async_log_success_eventr   c           
         s   |j d u rd S ttj }t|j }| j|||dI d H }|d u r2| j|||||dI d H }n.|| |krKt	
d | j|||||dI d H }n|||  }t|}	| j|||	dI d H  t	
d| d|  d S )N)r   r   r   )r   r   r   r   r   z,Budget window expired - resetting everything)r   r   r   zIncremented spend for z by )r   r   nowr   utc	timestampr   r   r   r   r   rD   r   )
r    r   r   r   r   r   r   r   Zremaining_timeZttl_for_incrementr!   r!   r"   r     sF   


	z-RouterBudgetLimiting._increment_spend_for_keyc              
      sr   	 z|   I dH  ttI dH  W n$ ty7 } ztdt|  ttI dH  W Y d}~nd}~ww q)z
        Handler that triggers sync_in_memory_spend_with_redis every DEFAULT_REDIS_SYNC_INTERVAL seconds

        Required for multi-instance environment usage of provider budgets
        TNzError in periodic sync task: ) _sync_in_memory_spend_with_redisrJ   sleepDEFAULT_REDIS_SYNC_INTERVAL	Exceptionr   errorrB   r    er!   r!   r"   rL     s   z=RouterBudgetLimiting.periodic_sync_in_memory_spend_with_redisc              
      s   z'| j js	W dS td| j t| jdkr#t| j jj| jd g | _W dS  t	yE } zt
dt|  W Y d}~dS d}~ww )a  
        How this works:
        - async_log_success_event collects all provider spend increments in `redis_increment_operation_queue`
        - This function pushes all increments to Redis in a batched pipeline to optimize performance

        Only runs if Redis is initialized
        Nz.Pushing Redis Increment Pipeline for queue: %sr   )Zincrement_list*Error syncing in-memory cache with Redis: )rF   redis_cacher   r   rI   r8   rJ   rK   Zasync_increment_pipeliner   r   rB   r   r!   r!   r"   #_push_in_memory_increments_to_redis#  s*   z8RouterBudgetLimiting._push_in_memory_increments_to_redisc           
   
      s  z| j jdu rW dS |  I dH  g }| jdur4| j D ]\}}|du r'q|d| d|j  q| jdurT| j D ]\}}|du rGq>|d| d|j  q>| jdurt| j D ]\}}|du rgq^|d| d|j  q^| j jj	|dI dH }t
|tr| D ]$\}}|dur| j jj|t|dI dH  td| d	|  qW dS W dS  ty }	 ztd
t|	  W Y d}	~	dS d}	~	ww )a  
        Ensures in-memory cache is updated with latest Redis values for all provider spends.

        Why Do we need this?
        - Optimization to hit sub 100ms latency. Performance was impacted when redis was used for read/write per request
        - Use provider budgets in multi-instance environment, we use Redis to sync spend across all instances

        What this does:
        1. Push all provider spend increments to Redis
        2. Fetch all current provider spend from Redis to update in-memory cache
        Nrp   rq   rz   r|   )Zkey_list)r%   rj   zUpdated in-memory cache for rc   r   )rF   r   r   rG   r4   r   r   rM   rN   re   rR   r;   r   r   rg   r   r   r   r   rB   )
r    rk   rs   r   r   tagZredis_valuesr%   rj   r   r!   r!   r"   r   A  s\   



	z5RouterBudgetLimiting._sync_in_memory_spend_with_redisr   c                 C      | j d u rd S | j |d S r   )rM   r)   )r    r   r!   r!   r"   r     s   
z6RouterBudgetLimiting._get_budget_config_for_deploymentrs   c                 C   r   r   )rG   r)   )r    rs   r!   r!   r"   r     s   
z4RouterBudgetLimiting._get_budget_config_for_providerr   c                 C   r   r   )rN   r)   )r    r   r!   r!   r"   r     s   
z/RouterBudgetLimiting._get_budget_config_for_tagr   c                 C   s   z;| dpi }t|tr|jpd}|}nt|tr&| dp d}t|}nd}ti }tjt||d\}}}}W |S  t	yM   t
d|  Y d S w )Nrw   ro   rV   )rV   rw   z+Error getting LLM provider for deployment: )r)   rR   r   rV   r;   r   rS   Zget_llm_providerrB   r   r   r   )r    r   Zdeployment_litellm_paramsrV   Zprovider_resolution_params_r   r!   r!   r"   r}     s.   


	z5RouterBudgetLimiting._get_llm_provider_for_deploymentrt   ru   c                 C   s"   t  }|r|j|||d dS dS )z
        Optional helper - emit provider remaining budget metric to Prometheus

        This is helpful for debugging and monitoring provider budget limits.
        rr   N)r   Ztrack_provider_remaining_budget)r    rs   rt   ru   Zprometheus_loggerr!   r!   r"   r     s   	
z@RouterBudgetLimiting._track_provider_remaining_budget_prometheusc                    sn   |  |}|du rdS d| d|j }| jjr$| jj|I dH }n	| j|I dH }|dur5t|S dS )ab  
        GET the current spend for a provider from cache

        used for GET /provider/budgets endpoint in spend_management_endpoints.py

        Args:
            provider (str): The provider to get spend for (e.g., "openai", "anthropic")

        Returns:
            Optional[float]: The current spend for the provider, or None if not found
        Nrp   rq   r[   )r   r   rF   r   r   rg   )r    rs   r   r   r   r!   r!   r"   _get_current_provider_spend  s   
z0RouterBudgetLimiting._get_current_provider_spendc                    s   |  |}|d u rd S d| d|j }| jjr$| jj|I d H }n	| j|I d H }|d u r3d S ttjt	|d 
 S )Nrp   rq   )seconds)r   r   rF   r   Zasync_get_ttlr   r   r   r   r   	isoformat)r    rs   r   r   r   r!   r!   r"   %_get_current_provider_budget_reset_at  s   
z:RouterBudgetLimiting._get_current_provider_budget_reset_atc                    s   d| d|j  }d| }d}|j durt|j }| j|I dH }|du r<ttj }| jj	|||dI dH  | j|I dH }|du rW| jj	|d|dI dH  dS dS )a!  
        Initialize provider budget in cache by storing the following keys if they don't exist:
        - provider_spend:{provider}:{budget_config.time_period} - stores the current spend
        - provider_budget_start_time:{provider} - stores the start time of the budget window

        rp   rq   r   Nr   r[   )
r   r   rF   r   r   r   r   r   r   r   )r    rs   r   r   r   r   r   Z
_spend_keyr!   r!   r"   _init_provider_budget_in_cache  s$   



z3RouterBudgetLimiting._init_provider_budget_in_cachec                 C   s^   | durdS t jdurdS |du rdS |D ]}|di }|ds)|ddur, dS qdS )a  
        Returns `True` if the router budget routing settings are set and RouterBudgetLimiting should be initialized

        Either:
         - provider_budget_config is set
         - budgets are set for deployments in the model_list
         - tag_budget_config is set
        NTFrw   r~   r   )rS   rN   r)   )rG   rH   _modelr   r!   r!   r"   !should_init_router_budget_limiter  s   
z6RouterBudgetLimiting.should_init_router_budget_limiterc                 C   s   | j d urJ| j  D ]4\}}|d u rtd| d| j  t|ts1t|d|dd| j |< t| j|| j | d q
t	
d| j   d S d S )Nz$No budget config found for provider z, provider_budget_config: ru   time_period)ru   r   )rs   r   z#Initalized Provider budget config: )rG   r4   ri   rR   GenericBudgetInfor)   rJ   rK   r   r   r   )r    rs   r   r!   r!   r"   rO   .  s*   


z+RouterBudgetLimiting._init_provider_budgetsc           	   	   C   s   |d u rd S |D ]K}| di }| dpi }| d}| d}| d}td| d| d|  |d urS|d urS|d urSt||d	}| jd u rNi | _|| j|< qtd
| j  d S )Nrw   rx   ry   r~   r   z$Init Deployment Budget: max_budget: z, budget_duration: r{   r   ru   z&Initialized Deployment Budget Config: )r)   r   r   r   rM   )	r    rH   r   r   Z_model_infoZ	_model_idZ_max_budgetZ_budget_durationZ_budget_configr!   r!   r"   rP   G  s2   





z-RouterBudgetLimiting._init_deployment_budgetsc                 C   s   t jd u rd S ddlm}m} |durtd|j | jd u r#i | _t j D ]\}}t|t	r8t
di |}t|j|jd}|| j|< q(td| j  d S )Nr   )CommonProxyErrorspremium_userTz,Tag budgets are an Enterprise only feature, r   zInitialized Tag Budget Config: r!   )rS   rN   Zlitellm.proxy.proxy_serverr   r   ri   Znot_premium_userr4   rR   r;   r   r   r   r~   r   r   )r    r   r   r   r   Z_generic_budget_configr!   r!   r"   rQ   j  s(   




z&RouterBudgetLimiting._init_tag_budgetsr   )NN)+r=   r>   r?   r   r   r;   r
   r   r   r   rB   r   r#   r   r   rn   r   rg   r	   rh   rd   rD   r   r   r   r   r   rL   r   r   r   r   r   r}   r   r   r   r   staticmethodr   rO   rP   rQ   r!   r!   r!   r"   rE   [   s$   


K



	
^



E


;
3C




 
#rE   )*r@   rJ   r   r   r   typingr   r   r   r   r	   r
   rS   Zlitellm._loggingr   Zlitellm.caching.cachingr   Zlitellm.caching.redis_cacher   Z"litellm.integrations.custom_loggerr   r   Z*litellm.litellm_core_utils.duration_parserr   Z)litellm.router_strategy.tag_based_routingr   Z'litellm.router_utils.cooldown_callbacksr   Zlitellm.types.llms.openair   Zlitellm.types.routerr   r   r   Zlitellm.types.utilsr   r   r   r   r   r   rE   r!   r!   r!   r"   <module>   s(     /