
    mhٔ                    4   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZ d dlZd dlZd dlmZ d dlmZ d dlm Z  d dl!Z!d dl"Z!d dl#Z!d dl!m$Z$ d d	l%m&Z& d d
l'm(Z( d dl)m*Z*m+Z+m,Z,m-Z- d dl.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z> d dl?m@Z@ d dlAmBZB d dlCmDZD d dlEmFZF d dlGmHZH d dlImJZJ d dlKmLZL d dlMmNZN d dlOmPZPmQZQ d dlRmSZSmTZTmUZU d dlVmWZW d dlXmYZYmZZZ d d l[m\Z\ d d!l]m^Z^m_Z_m`Z`maZambZb d d"lcmdZdmeZemfZf d d#lgmhZi d d$ljmkZkmlZl d d%lmmnZn d d&lompZp d d'lqmrZrmsZs d d(ltmuZumvZv d d)lwmxZxmyZymzZzm{Z{ d d*l|m}Z}m~Z~mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d d+lmZ d d,lmZmZ d d-lmZ d d-lmZ d d.lmZmZmZ d d/lmZmZmZmZmZmZmZmZmZmZ d0d1lmZ erd d2lmZ d d3lmZmZ eeef         ZneZeZeZ G d4 d5ej                  Z G d6 d7          ZdS )8    Ndefaultdict)	lru_cache)TYPE_CHECKINGAnyAsyncGeneratorCallableDictListLiteralOptionalTupleUnioncast)AsyncOpenAI)	BaseModel)overload)get_secret_str)verbose_router_logger)uuid)	DualCacheInMemoryCache
RedisCacheRedisClusterCache)DEFAULT_MAX_LRU_CACHE_SIZE)CustomLogger)run_async_function)!_get_parent_otel_span_from_kwargs)coroutine_checker)CredentialAccessor)tracer)Logging)RouterBudgetLimiting)LeastBusyLoggingHandler)LowestCostLoggingHandler)LowestLatencyLoggingHandler)LowestTPMLoggingHandler)LowestTPMLoggingHandler_v2)simple_shuffle)get_deployments_for_tag) add_fallback_headers_to_responseadd_retry_headers_to_response)"_get_router_metadata_variable_namereplace_model_in_jsonlshould_replace_model_in_jsonl)InitalizeCachedClient)get_dynamic_litellm_paramsis_clientside_credential)CooldownCache)DEFAULT_COOLDOWN_TIME_SECONDS_async_get_cooldown_deployments/_async_get_cooldown_deployments_with_debug_info_get_cooldown_deployments_set_cooldown_deployments)#_check_non_standard_fallback_formatget_fallback_model_grouprun_async_fallback)!get_num_retries_from_retry_policy)#async_raise_no_deployment_exceptionsend_llm_exception_alert)PromptCachingDeploymentCheck)ResponsesApiDeploymentCheck)0increment_deployment_failures_for_current_minute1increment_deployment_successes_for_current_minute)FlowItem	Scheduler)AllMessageValues	FileTypesOpenAIFileObjectOpenAIFilesPurpose)#CONFIGURABLE_CLIENTSIDE_AUTH_PARAMSVALID_LITELLM_ENVIRONMENTSAlertingConfigAllowedFailsPolicyAssistantsTypedDictCredentialLiteLLMParamsCustomPricingLiteLLMParamsCustomRoutingStrategyBase
DeploymentDeploymentTypedDictLiteLLM_ParamsMockRouterTestingParamsModelGroupInfoOptionalPreCallChecksRetryPolicyRouterCacheEnumRouterGeneralSettingsRouterModelGroupAliasItemRouterRateLimitErrorRouterRateLimitErrorBasicRoutingStrategy)ServiceTypes)GenericBudgetConfigTypeLiteLLMBatch)	ModelInfo)ModelResponseStreamStandardLoggingPayloadUsage)
CustomStreamWrapperEmbeddingResponseModelResponseRulesfunction_setupget_llm_provider!get_non_default_completion_params
get_secretget_utc_datetimeis_region_allowed   )PatternMatchRouter)Span)
AutoRouterPreRoutingHookResponsec                       e Zd ZdZdS )RoutingArgs<   N)__name__
__module____qualname__ttl     9/usr/local/lib/python3.11/site-packages/litellm/router.pyru   ru      s        
CCCr|   ru   c            U          e Zd ZU g Zeed<   dZee         ed<   dZ	e
ed<   dZdZee         ed<   dZee         ed<   dZeeeeeef                           ed	<   dddddddi dddddddddddd
dg g g i ddddi ddddddi dd e            df)deeee         eeeef                  f                  dee         dee         dee         dee
         dee         dee         dedeee                  de
dee         dee
         dee
         dee
         dee         dee         dee         dee
         deded          d!eee                  d"ed#ed$ed%eeeeeef         f                  d&ed'ed(e
d)eeeef                  d*eeef         d+ee
         d,ee          d-ee         d.ee         d/ed0         d1ee!         d2ed3ee"         d4ee#         d5ee         d6ed7dfTd8Z$d9 Z%d: Z&e'd;eeef         d7ee(e)f         fd<            Z*d=e(fd>Z+d/ee,ef         d2efd?Z-d@ Z.dA Z/dBee         fdCZ0d1ee!         fdDZ1dEefdFZ2dGedHeeeef                  d7ee3e4f         fdIZ5dGedHeeeef                  d7ee3e4f         fdJZ6e7dGedHee8         dKedL         d7e4fdM            Z9e7	 ddGedHee8         dKed         d7e3fdN            Z9e7	 ddGedHee8         dKeedL         ed         f         d7ee4e3f         fdO            Z9	 ddGedHee8         dKefdPZ9dQe4dHeeeef                  dRed7e4fdSZ:dGedHeeeef                  d7ee3e4f         fdTZ;	 ddGedVedWee         d7dfdXZ<	 ddVedWee         d7dfdYZ=	 ddEedVedZee         d7e>fd[Z?	 ddEedVedZee         d7dfd\Z@dEedVefd]ZAdVed^ed7eeee
f                  fd_ZBdVed^ed7eeee
f                  fd`ZCdVed^ed7eeee
f                  fdaZDdbee         dHeeeeef                  eeeeef                           f         fdcZEdGedHeee8                  fddZFe7dGedHeeeef                  dKedL         d7e4fde            ZGe7	 ddGedHeeeef                  dKed         d7e3fdf            ZG	 ddGedHeeeef                  dKefdgZGe7	 ddGedHee8         dhe
dKed         d7e3f
di            ZHe7dGedHee8         dhe
dKedL         d7e4f
dj            ZH	 ddGedHee8         dhe
fdkZHdGedhe
dledmeIednf         dVeeef         f
doZJdGed7efdpZKdGedHee8         dVeeef         fdqZLdredGefdsZMdredGefdtZNdredGefduZOdredGefdvZPdweQdGefdxZRdweQdGefdyZSdGedzed{efd|ZTdGefd}ZUdGefd~ZV	 	 	 ddGedredee         dee         dee         f
dZW	 	 	 ddGedredee         dee         dee         f
dZXdGedrefdZY	 	 	 ddedGedee         dee         dee         f
dZZdedGefdZ[dGedlefdZ\dGedefdZ]dGedlefdZ^	 ddGedzeeef         dee         d7e_fdZ`dzeeef         dGefdZa	 d dGedzeeef         dee         d7e_fdZbdzeeef         dGefdZcdGed7edfdZedGed7edfdZfdGed7egfdZhdGed7egfdZi	 ddGee         d7egfdZjdGefdZk	 ddledee         fdZl	 d!dleded         fdZm	 ddledee         fdZndlefdZo	 	 d"dledee         deep         fdZqderdee         d"ee         d#ee         d$ee         dee         dmedVefdZs etju                    d             Zv	 	 	 	 d#dVedee         d"ee         d#ee         d$ee         f
dZw etju                    d             ZxdlefdZy	 ddVedee         fdZz	 	 	 	 	 d$derdee         dee         d#ee         d$ee         dee         fdZ{d Z|	 dd"eeeee         f                  dee         d7eee                  fdZ}	 	 d"derde
de
dee         dee         d7ee
ef         fdZ~d Zd7ee         fdZd7efdZdee         fdZdVed7ed         fdZdVederd7efdZdedee         d7e
fdZd7efdZdGede3dVed7efdZdGedee         fdZdGedee         d7eIee         ee         f         fdZdEefdZ	 ddEedee         dee         fdĄZ	 	 d"dGedee         dHeee8                  dee         dee         dee         fdƄZdedefdȄZdedededed7ee>         f
d̈́Zded7efd΄ZdEe>fdτZdEe>d7efdЄZdefdфZdEe>d7e>fd҄ZdEe>dedGefdӄZdEe>d7ee>         fdԄZdede
d7dfdׄZ	 ddGedee         d7dfd؄ZdEe>d7ee>         fdلZded7ee>         fdۄZded7ee>         fd܄Zded7ee         fd݄Zded7ee>         fd߄Ze7	 ddEededdd7efd            Ze7dEddeded7efd            Z	 ddEee         dedee         d7efdZded7ee         fdZded7ee         fdZdeded7ee         fdZdeded7ee         fdZded7ee         fdZded7eIee
         ee
         f         fdZ ee          ded7ee         fd            Zded7eee
f         fdZ	 ddedee         d7efdZded7dfdZdefdZ	 d%dee         ded7ee         fdZded7efdZdeded7ee         fdZ	 ddedGedee         d7efdZ	 	 d"dedee         dee         d7ee         fdZddee         d7ee         fdZ	 ddEedee         d7ee         fdZdee         d7efdZ	 ddee         d7ee         fd Z	 d"dee         dee         d7eee                  fdZ	 	 	 d&dee         dee         dee         d7eeee         f         fdZded7efdZd Zd ZddZ	 ddGededHeeeef                  dee         fdZdGed7ee         fd	ZdGed7efd
Z	 	 	 	 d'dGedHeeeeef                           dzeeeef                  dee         dee         d7eIeeeef         f         fdZ	 	 	 	 d'dGededHeeeeef                           dzeeeef                  dee         dee         d7eee         ef         fdZ	 	 	 d(dGededHeeeeef                           dzeeeef                  dee         f
dZ	 	 	 d(dGededHeeeeef                           dzeeeef                  dee         d7ee         fdZ	 	 	 	 d'dGedHeeeeef                           dzeeeef                  dee         dee         f
dZdee         dee         d7ee         fdZ	 ddee         fdZ	 dderdee         fdZ̐derfdZ͐d ZΐdefdZАd Zѐd ZdS ()  Routermodel_namesFcache_responsesi  default_cache_time_secondsNleastbusy_loggerlowesttpm_loggeroptional_callbacksINFOr   simple-shuffle
model_listassistants_config	redis_url
redis_host
redis_portredis_passwordcache_kwargscaching_groups
client_ttlpolling_intervaldefault_prioritynum_retriesmax_fallbackstimeoutstream_timeoutdefault_litellm_paramsdefault_max_parallel_requestsset_verbosedebug_level)DEBUGr   default_fallbacks	fallbackscontext_window_fallbackscontent_policy_fallbacksmodel_group_aliasenable_pre_call_checksenable_tag_filteringretry_afterretry_policymodel_group_retry_policyallowed_failsallowed_fails_policycooldown_timedisable_cooldownsrouting_strategy)r   
least-busyusage-based-routinglatency-based-routingcost-based-routingusage-based-routing-v2optional_pre_call_checksrouting_strategy_argsprovider_budget_configalerting_configrouter_general_settingsignore_invalid_deploymentsreturnc*                    || _         |)| _        || _        || _        || _        ddlm}*  |*            | _        dt          _	        | j         du rI|dk    rt          j        t          j                   n$|dk    rt          j        t          j                   |(pt                      | _        || _        g | _        i | _        d}+d},i }-|
| _        ||W|Ud}+|||-d	<   |||-d
<   |t+          |          |-d<   |||-d<   |-                    |           |                     |-          },|r0t          j        t          j        d d|+i|-t          _        || _        t7          |,t9                                | _        t;          ||,          | _        || _        d| _         || _!        g | _"        tG                      | _$        i | _%        i | _&        |pi | _'        i | _(        i | _)        |\| *                    |           | +                    |           | j,        | _-        |D ]"}.d|.d         v rd| j        |.d         d         <   #ng | _,        ||| _.        nt          j.        | _.        |!pt^          | _0        tc          | j        | j0                  | _2        |"| _3        t9                      | _4        ||| _5        n/t          j5        t          j5        | _5        ntl          j7        | _5        ||| _8        n/t          j8        t          j8        | _8        nt          j9        | _8        |pt          j:        | _;        || _<        || _=        |#| _>        |pt          j?        }/| @                    |/           |/| _?        |t          jA        <|pt          jA        }/| j?        | j?        B                    d|/i           n
d|/ig| _?        |pt          jC        | _C        |pt          jD        }0| @                    |0           |0| _D        t          t                    | _G        t          t                    | _H        t          t                    | _I        g | _J        |pi }t          jK        ||           | _L        || _M        | jM        N                    d|           | jM        N                    dd           | jM        N                    di                               d|	i           i | _O        	 | P                    |#|%           d| _Q        t          t          jS        t                    r%t          jU        V                    | jW                   n$t          jU        V                    | jW                   t          t          jX        t                    r%t          jU        Y                    | jZ                   n| jZ        gt          _X        t          t          j[        t                    r%t          jU        \                    | j]                   n| j]        gt          _[        t          t          j^        t                    r%t          jU        _                    | j`                   n| j`        gt          _^        |%| _a        |&| _b        d| _c        t          je        || jb                  r|$|$B                    d           ndg}$d| _f        |t          |t                    rt          d i || _f        nt          |t                    r|| _f        t          ji        dj                    | jf        k                    d                               || _l        d| _m        | t          | t                    rt          d i | | _m        nt          | t                    r| | _m        t          ji        dj                    | jm        k                    d                               |'| _o        |$| p                    |$           | jo        | q                                 | r                                 | s                                 | t                                 dS )!ah  
        Initialize the Router class with the given parameters for caching, reliability, and routing strategy.

        Args:
            model_list (Optional[list]): List of models to be used. Defaults to None.
            redis_url (Optional[str]): URL of the Redis server. Defaults to None.
            redis_host (Optional[str]): Hostname of the Redis server. Defaults to None.
            redis_port (Optional[int]): Port of the Redis server. Defaults to None.
            redis_password (Optional[str]): Password of the Redis server. Defaults to None.
            cache_responses (Optional[bool]): Flag to enable caching of responses. Defaults to False.
            cache_kwargs (dict): Additional kwargs to pass to RedisCache. Defaults to {}.
            caching_groups (Optional[List[tuple]]): List of model groups for caching across model groups. Defaults to None.
            client_ttl (int): Time-to-live for cached clients in seconds. Defaults to 3600.
            polling_interval: (Optional[float]): frequency of polling queue. Only for '.scheduler_acompletion()'. Default is 3ms.
            default_priority: (Optional[int]): the default priority for a request. Only for '.scheduler_acompletion()'. Default is None.
            num_retries (Optional[int]): Number of retries for failed requests. Defaults to 2.
            timeout (Optional[float]): Timeout for requests. Defaults to None.
            default_litellm_params (dict): Default parameters for Router.chat.completion.create. Defaults to {}.
            set_verbose (bool): Flag to set verbose mode. Defaults to False.
            debug_level (Literal["DEBUG", "INFO"]): Debug level for logging. Defaults to "INFO".
            fallbacks (List): List of fallback options. Defaults to [].
            context_window_fallbacks (List): List of context window fallback options. Defaults to [].
            enable_pre_call_checks (boolean): Filter out deployments which are outside context window limits for a given prompt
            model_group_alias (Optional[dict]): Alias for model groups. Defaults to {}.
            retry_after (int): Minimum time to wait before retrying a failed request. Defaults to 0.
            allowed_fails (Optional[int]): Number of allowed fails before adding to cooldown. Defaults to None.
            cooldown_time (float): Time to cooldown a deployment after failure in seconds. Defaults to 1.
            routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing", "cost-based-routing"]): Routing strategy. Defaults to "simple-shuffle".
            routing_strategy_args (dict): Additional args for latency-based routing. Defaults to {}.
            alerting_config (AlertingConfig): Slack alerting configuration. Defaults to None.
            provider_budget_config (ProviderBudgetConfig): Provider budget configuration. Use this to set llm_provider budget limits. example $100/day to OpenAI, $100/day to Azure, etc. Defaults to None.
            ignore_invalid_deployments (bool): Ignores invalid deployments, and continues with other deployments. Default is to raise an error.
        Returns:
            Router: An instance of the litellm.Router class.

        Example Usage:
        ```python
        from litellm import Router
        model_list = [
        {
            "model_name": "azure-gpt-3.5-turbo", # model alias
            "litellm_params": { # params for litellm completion/embedding call
                "model": "azure/<your-deployment-name-1>",
                "api_key": <your-api-key>,
                "api_version": <your-api-version>,
                "api_base": <your-api-base>
            },
        },
        {
            "model_name": "azure-gpt-3.5-turbo", # model alias
            "litellm_params": { # params for litellm completion/embedding call
                "model": "azure/<your-deployment-name-2>",
                "api_key": <your-api-key>,
                "api_version": <your-api-version>,
                "api_base": <your-api-base>
            },
        },
        {
            "model_name": "openai-gpt-3.5-turbo", # model alias
            "litellm_params": { # params for litellm completion/embedding call
                "model": "gpt-3.5-turbo",
                "api_key": <your-api-key>,
            },
        ]

        router = Router(model_list=model_list, fallbacks=[{"azure-gpt-3.5-turbo": "openai-gpt-3.5-turbo"}])
        ```
        r   )ServiceLoggingTr   r   localNredisurlhostportpasswordtype)redis_cachein_memory_cache)r   r   modellitellm_params)cachedefault_cooldown_time)fallback_param*)params
router_objr   max_retriesmetadatar   r   r   )r   r   router_budget_limitingz+[32mRouter Custom Retry Policy Set:
{}[0mexclude_nonez3[32mRouter Custom Allowed Fails Policy Set:
{}[0mr{   )ur   r   r   r   r   litellm._service_loggerr   service_logger_objlitellmsuppress_debug_infor   setLevelloggingr   r   rY   r   r   deployment_namesdeployment_latency_mapr   strupdate_create_redis_cacher   Cacher   r   r   rD   	schedulerr   default_deploymentr   provider_default_deployment_idsrp   pattern_routerteam_pattern_routersauto_routersr    model_id_to_deployment_index_map model_name_to_deployment_indices'_build_model_id_to_deployment_index_mapset_model_listr   healthy_deploymentsr   r4   r   r3   cooldown_cacher   failed_callsr   openaiDEFAULT_MAX_RETRIESr   ROUTER_MAX_FALLBACKSrequest_timeoutr   r   r   r   r   validate_fallbacksr   appendr   r   r   inttotal_calls
fail_callssuccess_callsprevious_modelsChatchatr   
setdefaultdeployment_statsrouting_strategy_initaccess_groups
isinstance_async_success_callbacklistlogging_callback_manager"add_litellm_async_success_callbackdeployment_callback_on_successsuccess_callbackadd_litellm_success_callback#sync_deployment_callback_on_success_async_failure_callback"add_litellm_async_failure_callback$async_deployment_callback_on_failurefailure_callbackadd_litellm_failure_callbackdeployment_callback_on_failurer   r   router_budget_loggerr#   !should_init_router_budget_limiterr   dictrW   infoformat
model_dumpr   r   rL   r   add_optional_pre_call_checks_initialize_alertinginitialize_assistants_endpointinitialize_router_endpointsapply_default_settings)1selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   
cache_typer   cache_configm
_fallbacks_content_policy_fallbackss1                                                    r}   __init__zRouter.__init__   s	   h '*D'&&<#$8!::::::2@.2B2B&*#t##f$$%.w|<<<<''%.w}===#>'<'>'> 	$ "3 	 ')#  	 ')$ Z%;
@V J$&/U#%'1V$%'*:V$)+9Z( ---22<@@K 	3}$ ' N N: N N N#2D #]__
 
 


 #-;
 
 
 !1"&-J*:<,022  	! 68 # 	
 AC- GI-!88DDD
+++-1_D$ R Ra 0111PQD/2B0CG0LMR
  O $!.D!(!6D*K.K+*D4F
 
 
 "3OO 	 "*D ,&2D%9D$!.D".!(!6D!(!=D9'"9,& 0 3'"3
z:::#(G,E,Q*Gg.GJ~)%%sJ&78888#&
"3!4 %H(H 	%
 %H(H 	" 	/HIII(A%(3)
 )
 (3(
 (
 +6+
 +
  	
 "8!=2L(>4PPP	 '=##..y'BBB#..}a@@@#..z2>>EE~.	
 	
 	
 ')	 	""-"7 	# 	
 	
 	
 "g5t<< 	,OO3    ,OO3   g.55 	R,II8    )-(P'QG$g5t<< 	,OO9   
 9/G+ g.55 	M,II3    )-(K'LG$%:"&<#DH!A!$:U
 
 
 	F (3(//0HIIII,D+E(37#,-- 1$/$?$?,$?$?!!L+66 1$0!!&DKK%00d0CC    % 	% CG!+.55 A,>,V,VAU,V,V))02DEE A,@)!&LSS-88d8KK    :I#/--.FGGG+%%'''++---((***##%%%%%r|   c                 4    g }|                      |           dS )z;
        Apply the default settings to the router.
        N)r  )r  default_pre_call_checkss     r}   r  zRouter.apply_default_settingsl  s$    
 :<))*ABBBtr|   c                    t           j                            t           j        |            t           j                            t           j        |            t           j                            t           j        |            t           j                            t           j        |            t           j                            t           j        |            t           j                            t           j        |            t           j                            t           j	        |            | j
        6| j
        D ]0}t           j                            t           j	        |d           /dS dS )z
        Pseudo-destructor to be invoked to clean up global data structures when router is no longer used.
        For now, unhook router's callbacks from all lists
        NF)require_self)r   r   #remove_callback_from_list_by_objectr   r   r  r  input_callbackservice_callback	callbacksr   )r  callbacks     r}   discardzRouter.discardu  sd   
 	(LL+T	
 	
 	
 	(LL$d	
 	
 	
 	(LL+T	
 	
 	
 	(LL$d	
 	
 	
 	(LL"D	
 	
 	
 	(LL$d	
 	
 	
 	(LLt	
 	
 	

 ". 3  0TT%xe U     /. r|   r  c                 \    |                      d          rt          di | S t          di | S )za
        Initializes either a RedisCache or RedisClusterCache based on the cache_config.
        startup_nodesNr{   )getr   r   )r  s    r}   r   zRouter._create_redis_cache  sB     O,, 	.$44|444-----r|   r   c                 :    | j         j        || j         _        dS dS )z
        Update the redis cache for the router, if none set.

        Allows proxy user to just do
        ```yaml
        litellm_settings:
            cache: true
        ```
        and caching to just work.
        N)r   r   )r  r   s     r}   _update_redis_cachezRouter._update_redis_cache  s'     :!)%*DJ""" *)r|   c                 $   t          j        d|            |t          j        j        k    s|t          j        k    rt          | j                  | _        t          t          j
        t                    r%t          j
                            | j                   n| j        gt          _
        t          t          j        t                    r&t          j                            | j                   d S d S |t          j        j        k    s|t          j        k    rbt#          | j        |          | _        t          t          j        t                    r&t          j                            | j                   d S d S |t          j        j        k    s|t          j        k    rbt)          | j        |          | _        t          t          j        t                    r&t          j                            | j                   d S d S |t          j        j        k    s|t          j        k    rbt/          | j        |          | _        t          t          j        t                    r&t          j                            | j                   d S d S |t          j        j        k    s|t          j        k    rbt5          | j        i           | _        t          t          j        t                    r&t          j                            | j                   d S d S d S )NzRouting strategy: )router_cache)r+  routing_args)r   r  r]   
LEAST_BUSYvaluer$   r   r   r   r   r   r   r   r"  r   add_litellm_callbackUSAGE_BASED_ROUTINGr'   r   USAGE_BASED_ROUTING_V2r(   lowesttpm_logger_v2LATENCY_BASEDr&   lowestlatency_logger
COST_BASEDr%   lowestcost_logger)r  r   r   s      r}   r   zRouter.routing_strategy_init  s    	"#J8H#J#JKKK : @@@?#===$;!Z% % %D! '0$77 A&--d.CDDDD*.*?)@&'+T22 ]0EEdF[\\\\\] ]  C III?#FFF$;!Z2% % %D! '+T22 ]0EEdF[\\\\\] ]  F LLL?#III'A!Z2( ( (D$ '+T22 `0EEdF^_____` `  = CCC?#@@@(C!Z2) ) )D% '+T22 a0EEdF_`````a a  : @@@?#===%=!Z& & &D" '+T22 ^0EEdF\]]]]]^ ^ Dr|   c                 F   |                      t          j                  | _        |                      t          j                  | _        |                      t          j                  | _        |                      t          j                  | _        |                      t          j                  | _        |                      t          j                  | _        |                      t          j                  | _        |                      t          j	                  | _	        d S N)
factory_functionr   acreate_assistantsadelete_assistantaget_assistantsacreate_threadaget_threada_add_messageaget_messagesarun_threadr  s    r}   r  z%Router.initialize_assistants_endpoint  s    "&"7"78R"S"S!%!6!6w7P!Q!Q#44W5LMM"33G4JKK001DEE!2273HII!2273HII001DEEr|   c                    |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j	        d          | _	        |                      t          j
        d	          | _
        |                      t          j        d
          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        |                      t          j        d          | _        ddlm}m}m}m} |                      |d          | _        |                      |d          | _        |                      |d          | _        |                      |d          | _        ddl m}m!}m"}m#} |                      |d          | _        |                      |d          | _"        |                      |d          | _!        |                      |d          | _#        d S ) N
moderation)	call_typeanthropic_messagesagenerate_contentaadapter_generate_content
aresponsesafile_deleteafile_content	responsesaget_responsesacancel_responsesadelete_responsesalist_input_items
_arealtimeacreate_fine_tuning_jobacancel_fine_tuning_jobalist_fine_tuning_jobsaretrieve_fine_tuning_jobalist_filesaimage_editallm_passthrough_router   )acreateasearchcreatesearchavector_store_searchavector_store_createvector_store_searchvector_store_create)rG  agenerate_content_streamgenerate_contentgenerate_content_streamrb  ra  rc  )$r9  r   amoderationrF  aanthropic_messagesrG  rH  rI  rJ  rK  rL  rM  rN  rO  rP  rQ  rR  rS  rT  rU  
afile_listrW  rX  litellm.vector_stores.mainrY  rZ  r[  r\  r]  r^  r_  r`  litellm.google_genaira  rb  rc  )	r  rY  rZ  r[  r\  rG  ra  rb  rc  s	            r}   r  z"Router.initialize_router_endpoints  s   00< 1 
 
 $(#8#8&2F $9 $
 $
  "&!6!6%1D "7 "
 "
 *.)>)>-9T *? *
 *
& //, 0 
 
 !11 N 2 
 
 "22!_ 3 
 
 ..w/@K.XX"33".> 4 
 
 "&!6!6%1D "7 "
 "
 "&!6!6%1D "7 "
 "
 "&!6!6%1D "7 "
 "
 //, 0 
 
 (,'<'<+7P (= (
 (
$ (,'<'<+7P (= (
 (
$ '+&;&;*6N '< '
 '
# *.)>)>-9T *? *
 *
& //- 0 
 
  00= 1 
 
 '+&;&;*6N '< '
 '
# 	POOOOOOOOOOO %)$9$95 %: %
 %
! %)$9$95 %: %
 %
! $(#8#83 $9 $
 $
  $(#8#83 $9 $
 $
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 	
 "&!6!6)< "7 "
 "
 !% 5 5(: !6 !
 !
 )-(=(=$0J )> )
 )
% (,'<'<#/H (= (
 (
$$$r|   r   c           	          |dS |D ]`}t          |t                    st          d| d          t          |          dk    r#t          d| dt          |           d          adS )z3
        Validate the fallbacks parameter.
        NzItem 'z' is not a dictionary.ro   zDictionary 'z%' must have exactly one key, but has z keys.)r   r  
ValueErrorlen)r  r   fallback_dicts      r}   r   zRouter.validate_fallbacksc  s     !F+ 	 	MmT22 Q !O-!O!O!OPPP=!!Q&& q=qqWZ[hWiWiqqq   '	 	r|   c                 ^   ||D ]}d }|dk    rt          | j                  }n<|dk    r"t          | j        | j        | j                  }n|dk    rt                      }|G| j        g | _        | j                            |           t          j	        
                    |           d S d S )Nprompt_caching)r   r   )
dual_cacher   r   responses_api_deployment_check)r?   r   r#   r   r   r@   r   r   r   r   r/  )r  r   pre_call_check	_callbacks       r}   r  z#Router.add_optional_pre_call_checksq  s     $/": U U48	!%555 <4: N N NII#'??? 4#':/3/J#'?! ! !II
 $'GGG ; = =I(.624/+229===4II)TTT# 0/U Ur|   
deploymentc                     	 t          j        |          }|d         }d|v r|d         dd         dz   |d<   |S # t          $ r+}t          j        dt          |                      |d}~ww xY w)z
        returns a copy of the deployment with the api key masked

        Only returns 2 characters of the api key and masks the rest with * (10 *).
        r   api_keyN   z
**********z+Error occurred while printing deployment - )copydeepcopy	Exceptionr   debugr   )r  rs  _deployment_copyr   es        r}   print_deploymentzRouter.print_deployment  s    
	#}Z88#34D#ENN**,:9,Ebqb,IH,Ty)## 	 	 	!'Fc!ffFF   G		s   7: 
A/&A**A/r   messagesc                     	 t          j        d| d           ||d<   ||d<   | j        |d<   |                     ||            | j        di |}|S # t
          $ r}|d}~ww xY w)	z
        Example usage:
        response = router.completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hey, how's it going?"}]
        zrouter.completion(model=z,..)r   r~  original_functionr   kwargsNr{   )r   rz  _completion_update_kwargs_before_fallbacksfunction_with_fallbacksry  )r  r   r~  r  responser|  s         r}   
completionzRouter.completion  s    
	!'(N5(N(N(NOOO#F7O!)F:*.*:F&'00uV0LLL3t3==f==HO 	 	 	G	s   AA 
A%A  A%c           	      $   d }	 |                      |||                    dd           |          }|                     ||           |d                                         }|d         }|                     ||          }|                    dd           }||||j        k    rd }	n|}	|                     |          s|                     |           t          j
        di i ||| j        |	d|}
t          j        d	| d
           t          |
t                    r1|                     ||
|          }|rt          j        d|d          |
S # t$          $ r/}t          j        d	| dt'          |           d           |d }~ww xY w)Nspecific_deploymentr   r~  r  request_kwargsrs  r  r   r   ru  rs  r~  cachingclientzlitellm.completion(model=)[32m 200 OK[0mr   r  r  Response output was blocked. messager   llm_provider)[31m Exception [0mr{   )get_available_deploymentpop_update_kwargs_with_deploymentrw  _get_clientr'  ru  has_model_id routing_strategy_pre_call_checksr   r  r   r   r  r   rg   "_should_raise_content_policy_errorContentPolicyViolationErrorry  r   )r  r   r~  r  
model_namers  datapotential_model_clientdynamic_api_keymodel_clientr  _should_raiser|  s                r}   r  zRouter._completion  s?    
=	66!$*JJ/Dd$K$K%	 7  J //:f/UUU./4466DgJ%)%5%5%f &6 & &" %jjD99O+*6#'='EEE#5 $$U++ M555LLL)   (#3*	  
  H "&OJOOO  
 (M22 	 $ G G(6 !H ! ! ! != >#%'    O 	 	 	!&[J[[CPQFF[[[   G		s   EE 
F *F

FstreamTc                 
   K   d S r8  r{   r  r   r~  r  r  s        r}   acompletionzRouter.acompletion         	r|   c                 
   K   d S r8  r{   r  s        r}   r  zRouter.acompletion  r  r|   c                 
   K   d S r8  r{   r  s        r}   r  zRouter.acompletion  r  r|   c                   K   	 ||d<   ||d<   ||d<   | j         |d<   |                     ||           |                    d          p| j        }t	          j                    }|                     |          }|r|                     |||           d {V S |)t          |t                    r | j	        di | d {V }n | j
        di | d {V }t	          j                    }	|	|z
  }
t          j        | j                            t          j        |
d||	t#          |          	                     |S # t$          $ r=}t          j        t'          | |t)          j                    |
                     |d }~ww xY w)Nr   r~  r  r  r  priority)r   r~  r  r  servicedurationrE  
start_timeend_timeparent_otel_spanlitellm_router_instancer  error_traceback_stroriginal_exceptionr{   )_acompletionr  r'  r   time_is_prompt_management_model_prompt_management_factoryr   r   schedule_acompletionasync_function_with_fallbacksasynciocreate_taskr   async_service_success_hookr^   ROUTERr   ry  r>   	traceback
format_exc)r  r   r~  r  r  request_priorityr  r  r  r  	_durationr|  s               r}   r  zRouter.acompletion  s     ,	#F7O!)F:%F8*.*;F&'00uV0LLL%zz*55N9NJ*.*J*J5*Q*Q'* !<<%! =         
  +
;KS0Q0Q+!:!:!D!DV!D!DDDDDDD!C!C!M!Mf!M!MMMMMMMy{{H :-I'BB(/&+)%%Fv%N%N C  	 	 	 O 		 		 		(,0#)(1(<(>(>'(	     G		s   BD9 B D9 9
F 8E;;F model_responseinitial_kwargsc                 ~    K   ddl m  G fddt                    } fd} | |                      S )z
        Helper to iterate over a streaming response.

        Catches errors for fallbacks using the router's fallback system
        r   )MidStreamFallbackErrorc                   4     e Zd Zdef fdZd Zd Z xZS )ERouter._acompletion_streaming_iterator.<locals>.FallbackStreamWrapperasync_generatorc                 ~    t                                          |j        j        j                   || _        d S )N)completion_streamr   custom_llm_providerlogging_obj)superr  r   r  r  _async_generator)r  r  	__class__r  s     r}   r  zNRouter._acompletion_streaming_iterator.<locals>.FallbackStreamWrapper.__init__K  sI      &5(.(6(J . :	 !    )8%%%r|   c                     | S r8  r{   rB  s    r}   	__aiter__zORouter._acompletion_streaming_iterator.<locals>.FallbackStreamWrapper.__aiter__U  s    r|   c                 D   K   | j                                          d {V S r8  )r  	__anext__rB  s    r}   r  zORouter._acompletion_streaming_iterator.<locals>.FallbackStreamWrapper.__anext__X  s-      !2<<>>>>>>>>>r|   )rw   rx   ry   r   r  r  r  __classcell__)r  r  s   @r}   FallbackStreamWrapperr  J  sh        8 8 8 8 8 8 8 8  ? ? ? ? ? ? ?r|   r  c                   K   	 2 3 d {V } | W V  6 d S # $ r0}ddl m}  |j                  }t          t          t
                   t          |dd                     }	 t          t                              d                    }                    dj	                  }                    dj
                  }                    dj                  }j        d	<   d
ddd|j        ddgz   d<                       |                               |d||||d           d {V }	t!          |	d          r|	2 3 d {V }
|
rt#          |
t$                    rt!          |
d          rddlm} t          t          t
                   t          |
dd                     }||g}ng }|,t!          |d          r|j        |                    |           |                    |          }t1          |
d|           |
W V  6 nd W V  n+# t2          $ r}t5          j        d|            |d }~ww xY wY d }~d S d }~ww xY w)Nr   )stream_chunk_builder)chunksusager   r   r   r   r  systema  You are a helpful assistant. You are given a message and you need to respond to it. You are also given a generated content. You need to respond to the message in continuation of the generated content. Do not repeat the same content. Your response should be in continuation of this text: rolecontent	assistantT)r  r  prefixr~  r  Fr{   )r|  disable_fallbacksr   r   r   model_groupargsr  r  )BaseTokenUsageProcessor)usage_objectszFallback also failed: )litellm.mainr  r  r   r   rd   getattrr   r'  r   r   r   r  generated_contentr  *async_function_with_fallbacks_common_utilshasattrr   rb   litellm.cost_calculatorr  r  r   combine_usage_objectssetattrry  r   error)itemr|  r  complete_response_objectcomplete_response_object_usager  r   r   r   fallback_responsefallback_itemr  r  r  combined_usagefallback_errorr  r  r~  r  r  s                   r}   stream_with_fallbackszERouter._acompletion_streaming_iterator.<locals>.stream_with_fallbacks[  s     a)"0       $JJJJJ #1..) ^) ^) ^)======+?+?)0, , ,( 26UO4gtDD2 2.T)"&sN,>,>w,G,G"H"HK0>0B0B#T^1 1I @N?Q?Q2D4Q@ @, @N?Q?Q2D4Q@ @, ;?:KN#6719$, (I 
 %0'(':&* 
= 
2N:. 88). 9    #MM.3&/5M5M(3!##1 N 	 	 	 	 	 	 	 	 & 0+>> $#3D  0  0  0  0  0  0  0- -P$.}>Q$R$RP %,M7$C$CP
!" !" !" !" !" !" )-$,UO$+M7D$I$I)" )" $)#45:GMM46M %C$N(/0NPW(X(X %O(F(L(X$1$8$89W$X$X$X %<$Q$Q6C %R %& %& !/
 !(w O O O"//////A 4E3DF #



  ) ) ))/AAA   )()      s^)sO     IAI$C%H	HCHI
H?!H::H??II)litellm.exceptionsr  re   )r  r  r~  r  r  r  r  s   ````  @r}   _acompletion_streaming_iteratorz&Router._acompletion_streaming_iterator=  s       	>=====	? 	? 	? 	? 	? 	? 	?$7 	? 	? 	?"b	) b	) b	) b	) b	) b	) b	) b	) b	)H %$%:%:%<%<===r|   c                   K   d}i }	 |                                 }||d<   t          |          }t          j                    }|                     |||                    dd          |           d{V }	|	}t          j                    }
|
|z
  }t          j        | j                            t          j
        |d||
t          |                               |                     |	|           |                     |	|           |	d	                                          }|d         }|                     |	|          }| j        |xx         d
z  cc<   i ||| j        |d|}t!          j        di |}|                    dd          }|                     |	|d          }|nt)          |t
          j                  rT|4 d{V  	 |                     |	||           d{V  | d{V }ddd          d{V  n# 1 d{V swxY w Y   n&|                     |	||           d{V  | d{V }t)          |t.                    r1|                     |||          }|rt!          j        d|d          | j        |xx         d
z  cc<   t7          j        d| d           |                     |	||           t)          |t:                    r|                     |||           d{V S |S # t           j        $ rq}|                    d	i                               dd          }|                    d	i                               dd          }|xj         d| d| z  c_         |d}~wtB          $ rF}t7          j        d| dtE          |           d           || j#        |xx         d
z  cc<   |d}~ww xY w)z
        - Get an available deployment
        - call it with a semaphore over the call
        - semaphore specific to it's rpm
        - in the semaphore,  make a check against it's local rpm before running
        Nr   r  r  async_get_available_deploymentr  rs  r  r  r   ro   r  litellm_logging_objmax_parallel_requestsrs  r  client_type)rs  r  r  r  r  r  r  zlitellm.acompletion(model=r  )rs  r  r  )r  r~  r  r   r   z$

Deployment Info: request_timeout: z

timeout: r  r  r{   )$rw  r   r  r   r  r  r  r   r  r^   r  _track_deployment_metricsr  _get_async_openai_model_clientr   r   r   r  r'  r  r   	Semaphore&async_routing_strategy_pre_call_checksrg   r  r  r   r   r  re   r  Timeoutr  ry  r   r   )r  r   r~  r  r  _timeout_debug_deployment_dict#input_kwargs_for_streaming_fallbackr  r  rs  r  r  r  r  input_kwargs	_responser  rpm_semaphorer  r  r|   deployment_request_timeout_paramdeployment_timeout_params                          r}   r  zRouter._acompletion  s&      
 	'C	28++--/;@/8@HHJ#BB!$*JJ/Dd$K$K%	  C          J .8*y{{H :-I'BB(/&>)%%Fv%N%N C  	 	 	 **%8H +    //:f/UUU./4466DgJ>>% ?  L Z(((A-((($/&	  
 L  +;;l;;I4:JJ%t5 5K !,,%3 -  M
 (Zw0. .( ) 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ EE#-$/)9 F         
 &/H
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ 
/ AA) +%5 B          "+?????? (M22 	 $ G G(6 !H ! ! ! != >#%'    z***a/***!&PZPPP   **%!!1 +    ($788 !AA#+%#F B          O 	 	 	/M/Q/Q "0 0c#T** - (F'I'I "( (c)T"" % II  JBb  J  J  pH  J  J  JIIG 	 	 	!&\Z\\SQRVV\\\   %
+++q0+++G	sQ   GK; 	(H1K; 
HK; HC'K; 9K; ;O	
A,M66O	AOO	r   r  metadata_variable_namec                 6   |                     d| j                  |d<   |                    dt          t	          j                                         d}|                     |          r|}|                    |i                               ||d           dS )zm
        Adds/updates to kwargs:
        - num_retries
        - litellm_trace_id
        - metadata
        r   litellm_trace_idNr   )r  r   )r'  r   r   r   r   uuid4_get_model_from_aliasr   )r  r   r  r  r   s        r}   r  z&Router._update_kwargs_before_fallbacksS  s     !'

=$:J K K},c$*,,.?.?@@@+/%%E%22 	& %0"55<<!8IJJ	
 	
 	
 	
 	
r|   c                     | j                                         }|                    di           pi }|                                D ]\  }}||                    ||           |                    |i                               |           dS )z
        Adds default litellm params to kwargs, if set.

        Handles inserting this as either "metadata" or "litellm_metadata" depending on the metadata_variable_name
        r   N)r   rw  r  itemsr   r   )r  r  r  defaultsmetadata_defaultskeyr.  s          r}   *_update_kwargs_with_default_litellm_paramsz1Router._update_kwargs_with_default_litellm_paramsh  s     .3355$LLR88>B #..** 	* 	*JC}c5)))) 	0"55<<=NOOOOOr|   function_namec                    |                     di                                           }|d                                         }t          ||          }t          |          }|                     |i                                d          }|                     ||          }	|                     d          }
|	|d<   |
|d<   t          |t          di ||	          }|                     |
           |S )z.
        Handle clientside credential
        
model_infor   )r   r  r  r  )r  r   idoriginal_model_idr  r   r   r  r{   )r'  rw  r1   r-   _generate_model_idrQ   rS   upsert_deployment)r  rs  r  r  r   r   dynamic_litellm_paramsr  r  	_model_idr#  deployment_pydantic_objs               r}   _handle_clientside_credentialz$Router._handle_clientside_credential}  s3     ^^L"55::<<
#$45::<<!;)&"
 "
 "
 "D'"
 "
 "
 jj!7<<@@OO++#4J , 
 
	 'NN400$
4*;
&'",")CC,BCC!#
 #
 #

 	. 	 	
 	
 	
 '&r|   c                 ^   |                     di                                           }|d         d         }|d                              d          }|d         }t          |          rI|                     |||          }|j                                        }|j        j        }|j        j        }t          |          }	|
                    |	i                               ||||d	           ||d<   |                     ||d         
          |d<   |                     ||	           dS )z
        2 jobs:
        - Adds selected deployment, model_info and api_base to kwargs["metadata"] (used for logging)
        - Adds default litellm params to kwargs, if set.
        r   r   r   api_baser  )r  rs  r  r  r!  )rs  r   r,  deployment_model_namer  r  r   )r  r  N)r'  rw  r2   r*  r   r  r   r   r,  r-   r   r   _get_timeoutr  )
r  rs  r  r  r   deployment_litellm_model_namedeployment_api_baser.  r)  r  s
             r}   r  z%Router._update_kwargs_with_deployment  s|     ^^L"55::<<
(23C(DW(M%()9:>>zJJ *< 8#6::: 	R&*&H&H%fM 'I ' '# 1;FFHHJ,C,R,X)"9"H"Q!C'"
 "
 "
 	0"55<<;(/)>	 	
 	
 	
  *| --
+; < . 
 
y 	772H 	8 	
 	
 	
 	
 	
r|   c                     |                      ||d          }|                    dd          }||||j        k    rd}n|}|S )a  
        Helper to get AsyncOpenAI or AsyncAzureOpenAI client that was created for the deployment

        The same OpenAI client is re-used to optimize latency / performance in production

        If dynamic api key is provided:
            Do not re-use the client. Pass model_client=None. The OpenAI/ AzureOpenAI client will be recreated in the handler for the llm provider
        asyncr  ru  N)r  r'  ru  )r  rs  r  r  r  r  s         r}   r  z%Router._get_async_openai_model_client  sh     "&!1!1!&g "2 "
 "

 !**Y55'&2#9#AAALL1Lr|   r  c                     |                     dd          p7|                     dd          p!| j        p| j                             dd          S )z=Helper to get stream timeout from kwargs or deployment paramsr   N)r'  r   r   )r  r  r  s      r}   _get_stream_timeoutzRouter._get_stream_timeout  sd    
 JJ'.. Gxx $ G "	G
 *../?FF	
r|   c                     |                     dd          pc|                     dd          pM|                     dd          p7|                     dd          p!| j        p| j                             dd          }|S )zAHelper to get non-stream timeout from kwargs or deployment paramsr   Nr   )r'  r   r   r  r  r  r   s       r}   _get_non_stream_timeoutzRouter._get_non_stream_timeout  s    
 JJy$'' 	@zz+T22	@xx4 	@
 xx!4 	@ |	@ *..y$?? 	 r|   c                     d}|                     dd          r|                     ||          }||                     ||          }|S )z6Helper to get timeout from kwargs or deployment paramsNr  Fr/  )r'  r6  r9  r8  s       r}   r0  zRouter._get_timeout  sa    /3::h&& 	I..f4.HHG?22D 3  G r|   modelsc                 p   K   dt           dt          t                   f fd}dt           dt          t                   dt          f fd}t	          |t
                    rTt          d |D                       r;g }|D ] }|                     |d||d|           !t          j	        |  d{V }|S t	          |t
                    rt          d	 |D                       rg }t          |          D ])\  }	}
|D ]!}|                     |d||	|
d
|           "*t          j	        |  d{V }d t          t          |                    D             }|D ]Z}t	          |t                    r(||d                                      |d                    ?|d                             |           [|S dS dS )a6  
        Async Batch Completion. Used for 2 scenarios:
        1. Batch Process 1 request to N models on litellm.Router. Pass messages as List[Dict[str, str]] to use this
        2. Batch Process N requests to M models on litellm.Router. Pass messages as List[List[Dict[str, str]]] to use this

        Example Request for 1 request to N models:
        ```
            response = await router.abatch_completion(
                models=["gpt-3.5-turbo", "groq-llama"],
                messages=[
                    {"role": "user", "content": "is litellm becoming a better product ?"}
                ],
                max_tokens=15,
            )
        ```


        Example Request for N requests to M models:
        ```
            response = await router.abatch_completion(
                models=["gpt-3.5-turbo", "groq-llama"],
                messages=[
                    [{"role": "user", "content": "is litellm becoming a better product ?"}],
                    [{"role": "user", "content": "who is this"}],
                ],
            )
        ```
        r   r~  c                 h   K   	  j         d| |d| d{V S # t          $ r}|cY d}~S d}~ww xY wzs
            Wrapper around self.async_completion that catches exceptions and returns them as a result
            r   r~  Nr{   r  ry  r   r~  r  r|  r  s       r}   _async_completion_no_exceptionszARouter.abatch_completion.<locals>._async_completion_no_exceptions2  m      -T-WEHWWPVWWWWWWWWW       
1,11idxc                 p   K   	  j         d| |d| d{V |fS # t          $ r}||fcY d}~S d}~ww xY wr>  r@  )r   r~  rE  r  r|  r  s        r}   *_async_completion_no_exceptions_return_idxzLRouter.abatch_completion.<locals>._async_completion_no_exceptions_return_idx=  s|      *$*TTTVTTTTTTTT     #vs    
5055c              3   @   K   | ]}t          |t                    V  d S r8  )r   r  .0r  s     r}   	<genexpr>z+Router.abatch_completion.<locals>.<genexpr>P  s,      -T-TajD.A.A-T-T-T-T-T-Tr|   r?  Nc              3   @   K   | ]}t          |t                    V  d S r8  )r   r   rI  s     r}   rK  z+Router.abatch_completion.<locals>.<genexpr>W  s,      /V/V
1d0C0C/V/V/V/V/V/Vr|   )r   rE  r~  c                     g | ]}g S r{   r{   )rJ  _s     r}   
<listcomp>z,Router.abatch_completion.<locals>.<listcomp>b  s    /Q/Q/Qq/Q/Q/Qr|   ro   r   r{   )r   r   rE   r   r   r   allr   r  gather	enumeraterangerk  tuple)r  r;  r~  r  rB  rG  _tasksr   r  rE  r  rL  final_responsess   `            r}   abatch_completionzRouter.abatch_completion  sy     H				"&'7"8		 		 		 		 		 				+,	 	 	 	 	 	 	& h%% 	##-T-T8-T-T-T*T*T 	#F i i==gET\gg`fgghhhh$^V4444444HO$'' 	#C/V/VX/V/V/V,V,V 	#F )( 3 3  W#  EMMBB "'S7 FL     &nf5555555I/Q/QE#h--<P<P/Q/Q/QO% 8 8h.. 8#HQK077DDDD#A&--h7777""#	# 	# 	# 	#r|   c           	          K   dt           dt          t                   f fd}g }|D ] }|                     |d||d|           !t	          j        |  d{V }|S )a  
        Async Batch Completion - Batch Process multiple Messages to one model_group on litellm.Router

        Use this for sending multiple requests to 1 model

        Args:
            model (List[str]): model group
            messages (List[List[Dict[str, str]]]): list of messages. Each element in the list is one request
            **kwargs: additional kwargs
        Usage:
            response = await self.abatch_completion_one_model_multiple_requests(
                model="gpt-3.5-turbo",
                messages=[
                    [{"role": "user", "content": "hello"}, {"role": "user", "content": "tell me something funny"}],
                    [{"role": "user", "content": "hello good mornign"}],
                ]
            )
        r   r~  c                 h   K   	  j         d| |d| d{V S # t          $ r}|cY d}~S d}~ww xY wr>  r@  rA  s       r}   rB  z]Router.abatch_completion_one_model_multiple_requests.<locals>._async_completion_no_exceptions  rC  rD  r?  Nr{   )r   r   rE   r   r  rQ  )r  r   r~  r  rB  rU  message_requestr  s   `       r}   -abatch_completion_one_model_multiple_requestsz4Router.abatch_completion_one_model_multiple_requestsj  s      ,				"&'7"8		 		 		 		 		 		 ' 	 	OMM// / =C     !0000000r|   c                 
   K   d S r8  r{   r  s        r}   "abatch_completion_fastest_responsez)Router.abatch_completion_fastest_response  r  r|   c                 
   K   d S r8  r{   r  s        r}   r]  z)Router.abatch_completion_fastest_response  r  r|   c                 `   K   d |                     d          D             }dt          dt          t          t          t          f                  dt          dt
          dt          t          t          t          f         f
 fd}g d	t          j        ffd
}|D ]5}t          j         |d|||d|          }                    |           6rQt          j        t          j                   d{V \  }	|	D ]#}
 ||
           d{V }|d|j        d<   |c S $Qt          d          )z
        model - List of comma-separated model names. E.g. model="gpt-4, gpt-3.5-turbo"

        Returns fastest response from list of model names. OpenAI-compatible endpoint.
        c                 6    g | ]}|                                 S r{   )striprI  s     r}   rO  z=Router.abatch_completion_fastest_response.<locals>.<listcomp>  s     666!''))666r|   ,r   r~  r  r  r   c                    K   	  j         d| ||d| d{V }|S # t          j        $ r) t          j        d                    |                       t          $ r}|cY d}~S d}~ww xY w)zn
            Wrapper around self.acompletion that catches exceptions and returns them as a result
            r   r~  r  Nz4Received 'task.cancel'. Cancelling call w/ model={}.r{   )r  r  CancelledErrorr   rz  r  ry  )r   r~  r  r  resultr|  r  s         r}   rB  zRRouter.abatch_completion_fastest_response.<locals>._async_completion_no_exceptions  s      	/t/hehW]hhaghhhhhhhh)   %+JQQRWXX      s    A A*A%A*%A*taskc                   K   	 |  d {V }t          |t          t          f          rUt          j        d           D ]}|                                 |	                     |            S # t          $ r Y S w xY wn# t          $ r Y nw xY w	                     |            d S # t          $ r Y d S w xY w# 	                     |            w # t          $ r Y w w xY wxY w)Nz=Received successful response. Cancelling other LLM API calls.)	r   rg   re   r   rz  cancelremoveKeyErrorry  )rg  rf  tpending_taskss      r}   check_responsezARouter.abatch_completion_fastest_response.<locals>.check_response  sT     #f}6I&JKK ")/W   + # #



!!((....   D"    
!((.....   DD!((....   Dsr   AA? A..
A;:A;>B8 ?
B	B8 BB8 B' '
B54B58C :CC 
CC CC rd  )return_whenNT!fastest_response_batch_completionzAll tasks failedr{   )splitr   r   r
   boolr   r   rg   re   ry  r  Taskr  r   waitFIRST_COMPLETED_hidden_params)r  r   r~  r  r  r;  rB  rn  rg  donecompleted_taskrf  rm  s   `           @r}   r]  z)Router.abatch_completion_fastest_response  s      76U[[%5%5666		"&tCH~"6	@D	PS	="5y@A	 	 	 	 	 	" 	w| 	 	 	 	 	 	,  	' 	'E&// (6 EK  D
   &&&&  
	"(/7+B) ) ) # # # # # #D- #' " "-~n========%QUF)*MN!MMM &  
	" *+++r|   r  c                 
   K   d S r8  r{   r  r   r~  r  r  r  s         r}   r  zRouter.schedule_acompletion  r  r|   c                 
   K   d S r8  r{   rz  s         r}   r  zRouter.schedule_acompletion  r  r|   c                 ~  K   t          |          }t          t          j                              }t	          ||d          }| j                            |           d {V  t          j                    | j        z   }	t          j                    }
| j        j	        }d}|
|	k     r| 
                    ||           d {V \  }}| j                            |j        |j        |           d {V }|rn3t          j        |           d {V  t          j                    }
|
|	k     |r|	  | j        d|||d| d {V }|j                            di            |j        d                             d	d
i           |S # t(          $ r}t+          |d|           |d }~ww xY wt-          j        d|d          )Nzgpt-3.5-turbor  
request_idr  requestFr   r  r"  r  health_deploymentsrd  additional_headers%x-litellm-request-prioritization-usedTr  %Request timed out while polling queuer   r  r{   )r   r   r   r  rC   r   add_requestr  r   r   _async_get_healthy_deploymentspollr~  r  r  sleepr  rv  r   r   ry  r  r   r
  )r  r   r~  r  r  r  r  _request_idr  r  	curr_timepoll_intervalmake_request_healthy_deploymentsrN  r  r|  s                    r}   r  zRouter.schedule_acompletion
  s      =VDD$*,,''"&
 
 
 n(((666666666 9;;-IKK	7("",0,O,O.> -P - - ' ' ' ' ' '# ! "&!4!4??#7 "5 " "      L
  (mM222222222 IKK	 (""  	"2$"2 #(6# #EK# #      	 (334H"MMM()=>EE<dC   !    :x000 /?%   s   -AF 
F%F  F%r  r  .c                   K   t          |          }t          t          j                              }t	          |||          }| j                            |           d {V  t          j                    | j        z   }	t          j                    }
| j        j	        }d}|
|	k     r| 
                    ||           d {V \  }}| j                            |j        |j        |           d {V }|rn3t          j        |           d {V  t          j                    }
|
|	k     |r	  ||i | d {V }t!          |j        t$                    r=|j                            di            |j        d                             ddi           |S # t*          $ r}t-          |d	|           |d }~ww xY wt/          j        d
|d          )Nr}  r  Fr  r  r  r  Tr  r  r   r  )r   r   r   r  rC   r   r  r  r   r   r  r  r~  r  r  r  r   rv  r  r   r   ry  r  r   r
  )r  r   r  r  r  r  r  r  r  r  r  r  r  r  rN  r  r|  s                    r}   _schedule_factoryzRouter._schedule_factoryH  sq      =VDD$*,,''"
 
 
 n(((666666666 9;;-IKK	7("",0,O,O.> -P - - ' ' ' ' ' '# ! "&!4!4??#7 "5 " "      L
  (mM222222222 IKK	 (""  	
"3"3T"DV"D"DDDDDDD	i6== ,778LbQQQ,-ABII@$G   !    :x000 /?%   s   -A&F 
F6F11F6c                    |                      |          }|dS t          |          dk    rdS |d         d                             dd           }|dS d|v r+|                    d          d         }|t          j        v rdS dS )	Nr  Fro   r   r   r   /T)get_model_listrk  r'  rq  r   )_known_custom_logger_compatible_callbacks)r  r   r   litellm_modelsplit_litellm_models        r}   r  z"Router._is_prompt_management_model  s    ((E(::
5z??a5"1&67;;GTJJ 5-"/"5"5c":":1"="g&WWWtur|   c                 x  K   |                     dd           }|-t          di dt                      t                      d|\  }}t	          t
          |          }|                     |dddg|                    dd                     }|                     ||	           |d
         	                                }|                     dd           }|                     d          p|d
                              dd           }|                     d          p|d
                              dd           }	|                     dd           p|d
                              dd           }
|t          |t                    s"t          d| dt          |                     |	7t          |	t                    s"t          d|	 dt          |	                     |                    ||t!          |          ||	|
          \  }}}h dfd|                                D             }i |||}||d<   ||d<   ||d<   ||d<   |	|d<   |
|d<   |                     |          }|t'          |          dk    r,|                    d           t)          j        di | d {V S  | j        di | d {V S )Nr  r  )r  	rules_objr  userpromptr  r  r   r~  r  r  r   r   	prompt_idprompt_variablesprompt_labelz*Prompt ID is not set or not a string. Got=z, type=z2Prompt variables is set but not a dictionary. Got=)r  )r   r~  non_default_paramsr  r  r  >   r  r  prompt_versionbitbucket_configdotprompt_configr  c                 $    i | ]\  }}|v	||S r{   r{   )rJ  kvprompt_management_paramss      r}   
<dictcomp>z5Router._prompt_management_factory.<locals>.<dictcomp>  s1     
 
 
QQ6N-N-NAq-N-N-Nr|   r~  r  r   r  r{   )r'  ri   rh   rm   r   LiteLLMLoggingr  r  r  rw  r   r   rj  r   r  get_chat_completion_promptrk   r  r  rk  r   r  r  )r  r   r~  r  litellm_logging_objectprompt_management_deploymentr  r  r  r  r  optional_paramsfiltered_data_model_listr  s                 @r}   r  z!Router._prompt_management_factory  s      "(,A4!H!H!)-; . .)6!&"2"4"4  	. .*"F "&n6L!M!M'+'D'D%(;;< &

+@$ G G (E (
 (
$ 	++3F 	, 	
 	
 	
 ,,<=BBDD$//JJ{++ !/K0

#k4
 
  	 "::
 
 
)*:;??
 
 	
 zz.$77 $;W<

#nd
#
# 	 Jy#$>$>`Y``tT]``   '
;KT0R0R'vEUvv^bcs^t^tvv   #==@OOO-% > 
 
		
$
 $
 $
 
 
 
 
!ZZ\\
 
 
 @M?V??w%z(>$%'{%5!"!-~))U);;#k"2"2a"7"7JJ*+++ ,66v6666666667T7AA&AAAAAAAAAr|   r  c                    	 ||d<   ||d<   | j         |d<   |                    d| j                  |d<   |                    di                               d|i            | j        di |}|S # t          $ r}|d }~ww xY w)Nr   r  r  r   r   r  r{   )_image_generationr'  r   r   r   r  ry  r  r  r   r  r  r|  s         r}   image_generationzRouter.image_generation  s    
	#F7O%F8*.*@F&'$*JJ}d>N$O$OF=!j"--44mU5KLLL3t3==f==HO 	 	 	G	   A+A. .
A?8A::A?c           	         d}	 t          j        d| d|            |                     |dddg|                    dd                     }|                     ||	           |d
                                         }|                     ||	          }| j        |xx         dz  cc<   |                     |           t          j
        di i ||| j        |d|}| j        |xx         dz  cc<   t          j        d| d           |S # t          $ rF}	t          j        d| dt          |	           d           || j        |xx         dz  cc<   |	d }	~	ww xY w)Nr  #Inside _image_generation()- model: 
; kwargs: r  r  r  r  r  r  r   ro   r  r  r  r  zlitellm.image_generation(model=r  r  r  r{   )r   rz  r  r  r  rw  r  r   r  r   r  r   r   r  ry  r   r   )
r  r  r   r  r  rs  r  r  r  r|  s
             r}   r  zRouter._image_generation   s   
*	!'OeOOvOO   66#)h??@$*JJ/Dd$K$K 7  J
 //:f/UUU./4466D>>% ?  L
 Z(((A-((( 11Z1HHH/  $#3*	  
  H z***a/***!&U*UUU   O 	 	 	!&a*aaRUVWRXRXaaa   %
+++q0+++G	s   DD 
E AEE c           	      X  K   	 ||d<   ||d<   | j         |d<   |                    d| j                  |d<   |                     ||            | j        di | d {V }|S # t
          $ r=}t          j        t          | |t          j
                    |                     |d }~ww xY w)Nr   r  r  r   r  r  r{   )_aimage_generationr'  r   r  r  ry  r  r  r>   r  r  r  s         r}   aimage_generationzRouter.aimage_generation.  s      	#F7O%F8*.*AF&'$*JJ}d>N$O$OF=!00uV0LLL?T?II&IIIIIIIIHO 		 		 		(,0#)(1(<(>(>'(	     G		s   AA" "
B),8B$$B)c           	      2  K   |}	 t          j        d| d|            t          |          }|                     |dddg|                    dd           |           d {V }|                     ||           |d	                                         }|d
         }|                     ||          }| j        |xx         dz  cc<   t          j
        di i ||| j        |d|}	|                     ||d          }
|
mt          |
t          j                  rS|
4 d {V  	 |                     ||           d {V  |	 d {V }	d d d           d {V  n# 1 d {V swxY w Y   n%|                     ||           d {V  |	 d {V }	| j        |xx         dz  cc<   t          j        d| d           |	S # t&          $ rF}t          j        d| dt)          |           d           || j        |xx         dz  cc<   |d }~ww xY w)Nr  r  r  r  r  r  r  r  r   r   ro   r  r  r  r  z litellm.aimage_generation(model=r  r  r  r{   )r   rz  r   r   r  r  rw  r  r   r   r  r   r  r   r  r  r	  r   r  ry  r   r   )r  r  r   r  r  r  rs  r  r  r  r  r|  s               r}   r  zRouter._aimage_generationC  s     
D	!'OeOOvOO    AHH#BB#)h??@$*JJ/Dd$K$K%	  C          J //:f/UUU./4466DgJ>>% ?  L
 Z(((A-(((0  $#3*	  
  H !,,%3 -  M (Zw0. .( ) . . . . . . . . EE#-@P F          &.~~~~~~H. . . . . . . . . . . . . . . . . . . . . . . . . . . AA)<L B          "*>>>>>>z***a/***!&V:VVV   O 	 	 	!&b:bbSVWXSYSYbbb   %
+++q0+++G	s>   D!G ''E G  
E**G -E*.AG 
HAHHfilec           	        K   	 ||d<   ||d<   | j         |d<   |                     ||            | j        di | d{V }|S # t          $ r=}t	          j        t          | |t          j                    |                     |d}~ww xY w)a  
        Example Usage:

        ```
        from litellm import Router
        client = Router(model_list = [
            {
                "model_name": "whisper",
                "litellm_params": {
                    "model": "whisper-1",
                },
            },
        ])

        audio_file = open("speech.mp3", "rb")
        transcript = await client.atranscription(
        model="whisper",
        file=audio_file
        )

        ```
        r   r  r  r  Nr  r{   )	_atranscriptionr  r  ry  r  r  r>   r  r  )r  r  r   r  r  r|  s         r}   atranscriptionzRouter.atranscription  s      .	#F7O!F6N*.*>F&'00uV0LLL?T?II&IIIIIIIIHO 		 		 		(,0#)(1(<(>(>'(	     G		   ?A 
B8BBc           	      "  K   |}	 t          j        d| d|            t          |          }|                     |dddg|                    dd           |           d {V }|                     ||           |d	                                         }|                     ||          }| j        |xx         d
z  cc<   t          j
        di i ||| j        |d|}	|                     ||d          }
|
mt          |
t          j                  rS|
4 d {V  	 |                     ||           d {V  |	 d {V }	d d d           d {V  n# 1 d {V swxY w Y   n%|                     ||           d {V  |	 d {V }	| j        |xx         d
z  cc<   t          j        d| d           |	S # t&          $ rF}t          j        d| dt)          |           d           || j        |xx         d
z  cc<   |d }~ww xY w)Nz!Inside _atranscription()- model: r  r  r  r  r  r  r  r   ro   )r  r  r  r  r  r  zlitellm.atranscription(model=r  r  r  r{   )r   rz  r   r   r  r  rw  r  r   r   r  r   r  r   r  r  r	  r   r  ry  r   r   )r  r  r   r  r  r  rs  r  r  r  r  r|  s               r}   r  zRouter._atranscription  s     
B	!'MEMMVMM    AHH#BB#)h??@$*JJ/Dd$K$K%	  C          J //:f/UUU./4466D>>% ?  L
 Z(((A-(((-   #3*	  
  H !,,%3 -  M (Zw0. .( ) . . . . . . . . EE#-@P F          &.~~~~~~H. . . . . . . . . . . . . . . . . . . . . . . . . . . AA)<L B          "*>>>>>>z***a/***!&S
SSS   O 	 	 	!&_
__PSTUPVPV___   %
+++q0+++G	s>   DF> 'EF> 
E""F> %E"&AF> >
HAH		Hinputvoicec           	        K   	 ||d<   ||d<   |                      |dddg|                    dd          |           d{V }|                     ||	           |d
                                         }|d          | j                                        D ]0\  }}||vr|||<   |dk    r||                             |           1|                     ||d          }	|                    dd          }
|
|	|
|	j	        k    rd}n|	}t          j        di i |d|i| d{V }|S # t          $ r=}t          j        t          | |t!          j                    |                     |d}~ww xY w)a  
        Example Usage:

        ```
        from litellm import Router
        client = Router(model_list = [
            {
                "model_name": "tts",
                "litellm_params": {
                    "model": "tts-1",
                },
            },
        ])

        async with client.aspeech(
            model="tts",
            voice="alloy",
            input="the quick brown fox jumped over the lazy dogs",
            api_base=None,
            api_key=None,
            organization=None,
            project=None,
            max_retries=1,
            timeout=600,
            client=None,
            optional_params={},
        ) as response:
            response.stream_to_file(speech_file_path)

        ```
        r  r  r  r  r  r  Nr  r  r   r   r   r4  r  ru  r  r  r{   )r   r  r  rw  r   r  r   r  r'  ru  r   aspeechry  r  r  r>   r  r  )r  r   r  r  r  rs  r  r  r  r  r  r  r  r|  s                 r}   r  zRouter.aspeech  s6     @4	#F7O#F7O#BB#)h??@$*JJ/Dd$K$K%	  C          J 00uV0LLL./4466DMM399;; ( (1VOO !F1II*__1I$$Q'''%)%5%5%f' &6 & &" %jjD99O+*6#'='EEE#5$_  l         H O 		 		 		(,0#)(1(<(>(>'(	     G		s   D&D+ +
E258E--E2c           	      &  K   	 ||d<   t           |d<   | j        |d<   |                     ||            | j        di | d {V }|S # t          $ r=}t          j        t          | |t          j	                    |                     |d }~ww xY wNr   r  r  r  r  r{   )
r  _arerankr  r  ry  r  r  r>   r  r  r  r   r  r  r|  s        r}   arerankzRouter.arerankQ	  s      	#F7O#F7O*.-F&'00uV0LLL?T?II&IIIIIIIIHO 		 		 		(,0#)(1(<(>(>'(	     G		s   AA	 	
B8BBc           	        K   d }	 t          j        d| d|            |                     ||                    dd           |           d {V }|                     ||           |d                                         }|d         }|                     ||          }| j        |xx         dz  cc<   t          j	        di i || j
        |d	| d {V }| j        |xx         dz  cc<   t          j        d
| d           |S # t          $ rF}t          j        d
| dt          |           d           || j        |xx         dz  cc<   |d }~ww xY w)NzInside _rerank()- model: r  r  )r   r  r  r  r   r   ro   )r  r  zlitellm.arerank(model=r  r  r  r{   )r   rz  r   r  r  rw  r  r   r   r  r   r   r  ry  r   r   )	r  r   r  r  rs  r  r  r  r|  s	            r}   r  zRouter._arerankf	  s5     
'	!'EEEEVEE    $BB$*JJ/Dd$K$K%  C          J
 //:f/UUU./4466DgJ>>% ?  L Z(((A-((($_  #3*   	       H z***a/***!&LLLL   O 	 	 	!&XXXQXXX   %
+++q0+++G	s   DD 
EAEEis_retryis_fallbackis_asyncc                 <   d|dg}	 ||d<   ||d<   |                     d| j                  |d<   |                    di                               d|i           |                     |||                    dd           	          }|d
                                         }	| j                                        D ]0\  }
}|
|vr|||
<   |
dk    r||
                             |           1t          j
        di i |	|| j        d|S # t          $ r}|d }~ww xY w)Nr  r  r   r  r   r   r  r  r  r   )r  r  r{   )r'  r   r   r   r  r  rw  r   r  r   text_completionr   ry  )r  r   r  r  r  r  r  r~  rs  r  r  r  r|  s                r}   r  zRouter.text_completion	  so    $778	#F7O%F8$*JJ}d>N$O$OF=!j"--44mU5KLLL 66!$*JJ/Dd$K$K 7  J ./4466D399;; ( (1VOO !F1II*__1I$$Q''' *ss-r-rSWSg-r-r-rkq-rsss 	 	 	G	s   DD
 

DDDc           	        K   |                     dd           :|                     ||                    d          | j        ||f|           d {V S 	 ||d<   ||d<   | j        |d<   |                     ||            | j        di | d {V }|S # t          $ r=}t          j	        t          | |t          j                    |                     |d }~ww xY w)	Nr  )r   r  r  r  r  r   r  r  r  r  r{   )r'  r  r  atext_completion_atext_completionr  r  ry  r  r  r>   r  r  )	r  r   r  r  r  r  r  r  r|  s	            r}   r  zRouter.atext_completion	  sI      ::j$''3//J//"&"7V_ 0         	#F7O%F8*.*@F&'00uV0LLL?T?II&IIIIIIIIHO 		 		 		(,0#)(1(<(>(>'(	     G		s   ?B 
C8CCc           	      .  K   	 t          j        d| d|            t          |          }|                     |d|dg|                    dd           |           d {V }|                     ||           |d                                         }|d	         }|                     ||          }| j        |xx         d
z  cc<   t          j
        di i ||| j        |d|}	|                     ||d          }
|
mt          |
t          j                  rS|
4 d {V  	 |                     ||           d {V  |	 d {V }	d d d           d {V  n# 1 d {V swxY w Y   n%|                     ||           d {V  |	 d {V }	| j        |xx         d
z  cc<   t          j        d| d           |	S # t&          $ rF}t          j        d| dt)          |           d           || j        |xx         d
z  cc<   |d }~ww xY w)N#Inside _atext_completion()- model: r  r  r  r  r  r  r   r   ro   r  r  r  r  zlitellm.atext_completion(model=r  r  r  r{   )r   rz  r   r   r  r  rw  r  r   r   r  r   r  r   r  r  r	  r   r  ry  r   r   )r  r   r  r  r  rs  r  r  r  r  r  r|  s               r}   r  zRouter._atext_completion	  s     C	!'OeOOvOO    AHH#BB#)f==>$*JJ/Dd$K$K%	  C          J //:f/UUU./4466DgJ>>% ?  L Z(((A-(((/  $#3*	  
  H !,,%3 -  M (Zw0. .( ) . . . . . . . . EE#-@P F          &.~~~~~~H. . . . . . . . . . . . . . . . . . . . . . . . . . . AA)<L B          "*>>>>>>z***a/***!&U*UUU   O 	 	 	!&\%\\SQRVV\\\    &&&!+&&&G	>   D!G %'EG 
E((G +E(,AG 
HAHH
adapter_idc           	        K   	 ||d<   ||d<   | j         |d<   |                    d| j                  |d<   |                    di                               d|i            | j        di | d {V }|S # t          $ r=}t          j        t          | |t          j                    |                     |d }~ww xY w)	Nr   r  r  r   r   r  r  r{   )_aadapter_completionr'  r   r   r   r  ry  r  r  r>   r  r  )	r  r  r   r  r  r  r  r  r|  s	            r}   aadapter_completionzRouter.aadapter_completion!
  s     	#F7O#-F< *.*CF&'$*JJ}d>N$O$OF=!j"--44mU5KLLL?T?II&IIIIIIIIHO 		 		 		(,0#)(1(<(>(>'(	     G		s   A1A6 6
B= 8B88B=c           	      .  K   	 t          j        d| d|            t          |          }|                     |dddg|                    dd           |           d {V }|                     ||           |d	                                         }|d
         }|                     ||          }| j        |xx         dz  cc<   t          j
        di i ||| j        |d|}	|                     ||d          }
|
mt          |
t          j                  rS|
4 d {V  	 |                     ||           d {V  |	 d {V }	d d d           d {V  n# 1 d {V swxY w Y   n%|                     ||           d {V  |	 d {V }	| j        |xx         dz  cc<   t          j        d| d           |	S # t&          $ rF}t          j        d| dt)          |           d           || j        |xx         dz  cc<   |d }~ww xY w)Nz&Inside _aadapter_completion()- model: r  r  zdefault textr  r  r  r  r   r   ro   )r  r  r  r  r  r  z"litellm.aadapter_completion(model=r  r  r  r{   )r   rz  r   r   r  r  rw  r  r   r   r  r   r  r   r  r  r	  r   r  ry  r   r   )r  r  r   r  r  rs  r  r  r  r  r  r|  s               r}   r  zRouter._aadapter_completion>
  s     C	!'RRR&RR    AHH#BB#)nEEF$*JJ/Dd$K$K%	  C          J //:f/UUU./4466DgJ>>% ?  L Z(((A-(((2  ",#3*	  
  H !,,%3 -  M (Zw0. .( ) . . . . . . . . EE#-@P F          &.~~~~~~H. . . . . . . . . . . . . . . . . . . . . . . . . . . AA)<L B          "*>>>>>>z***a/***!&XZXXX   O 	 	 	!&_U__PSTUPVPV___    &&&!+&&&G	r  c           	      R  K   	 ||d<   ||d<   | j         |d<   |                     ||d           t          j        d| d|             | j        d
i | d{V }|S # t
          $ r=}t          j        t          | |t          j
                    |	                     |d}~ww xY w)
        Helper function to make a generic LLM API call through the router, this allows you to use retries/fallbacks with litellm router
        r   original_generic_functionr  litellm_metadatar   r  r  z3Inside ageneric_api_call_with_fallbacks() - model: r  Nr  r{   )(_ageneric_api_call_with_fallbacks_helperr  r   rz  r  ry  r  r  r>   r  r  )r  r   r  r  r  r|  s         r}   !_ageneric_api_call_with_fallbacksz(Router._ageneric_api_call_with_fallbacks
  s     	#F7O2CF./*.*WF&'00FCU 1    "'_e__W]__   @T?II&IIIIIIIIHO  		 		 		(,0#)(1(<(>(>'(	     G		s   AA 
B&)8B!!B&r  c           	      b  K   |                     dd          }d}	 t          |          }	 |                     |||                    dd          |                     dd                     d{V }n+# t          $ r}|r |dd|i| d{V cY d}~S |d}~ww xY w|                     |||	           |d
                                         }	|	d         }
| j        |
xx         dz  cc<    |di i |	d| j        i|}| 	                    ||d          }|mt          |t          j                  rS|4 d{V  	 |                     ||           d{V  | d{V }ddd          d{V  n# 1 d{V swxY w Y   n%|                     ||           d{V  | d{V }| j        |
xx         dz  cc<   t          j        d|
 d           |S # t          $ rF}t          j        d| dt#          |           d           || j        |xx         dz  cc<   |d}~ww xY w)r  passthrough_on_no_deploymentFr  r~  Nr  )r   r  r~  r  r   r-  r   ro   r  r  r  r  z'ageneric_api_call_with_fallbacks(model=r  r  r  r{   )r  r   r   r'  ry  r  rw  r   r   r  r   r  r  r	  r   r   r  r   r   )r  r   r  r  r  r  r  rs  r|  r  r  r  r  s                r}   r  z/Router._ageneric_api_call_with_fallbacks_helper
  s1      (.zz2PRW'X'X$;C	@HH
#'#F#F#)#ZZ
D99(.

3H$(O(O	 $G $ $      

    / R!:!:!Q!Q!Q&!Q!QQQQQQQQQQQQQ
 //%fM 0    ./4466DgJZ(((A-((( 10  t3   H !,,%3 -  M (Zw0. .( ) . . . . . . . . EE#-@P F          &.~~~~~~H. . . . . . . . . . . . . . . . . . . . . . . . . . . AA)<L B          "*>>>>>>z***a/***!&]*]]]   O 	 	 	!&d%ddUXYZU[U[ddd    &&&!+&&&G	st   G AA4 3G 4
B>BBG BBB#G ?'E8&G 8
FG FAG 
H.(AH))H.c           
      ,   |j         }	 t          j        d| d| d|            |                     ||                    dd          |                    dd                    }|                     ||d	           |d
                                         }|d         }| j        |xx         dz  cc<   | 	                    |           	 t          |d                   \  }}	}}n# t          $ r d}	Y nw xY w |di i ||	| j        d|}
| j        |xx         dz  cc<   t          j        | d| d           |
S # t          $ rH}t          j        | d| dt          |           d           || j        |xx         dz  cc<   |d}~ww xY w)a  
        Make a generic LLM API call through the router, this allows you to use retries/fallbacks with litellm router
        Args:
            model: The model to use
            original_function: The handler function to call (e.g., litellm.completion)
            **kwargs: Additional arguments to pass to the handler function
        Returns:
            The response from the handler function
        z&Inside _generic_api_call() - handler: z	, model: r  r~  Nr  r  generic_api_callr-  r   r   ro   r  r  )r  r  z(model=r  r  r  r{   )rw   r   rz  r  r'  r  r  rw  r   r  rj   ry  r   r   r  r   r   )r  r   r  r  handler_namers  r  r  rN  r  r  r|  s               r}    _generic_api_call_with_fallbacksz'Router._generic_api_call_with_fallbacks
  sq    )1.	!'iiiPUiiagii   66J55$*JJ/Dd$K$K 7  J
 //%fDV 0    ./4466DgJZ(((A-((( 11Z1HHH+/?d7m/T/T/T,&11 + + +&*###+ )(  +>#3   	 H z***a/***!&KK
KKK   O 	 	 	!&RRRR3q66RRR    &&&!+&&&G	s>   CE C( 'E (C74E 6C77A	E 
FAFFc                    	 ||d<   ||d<   | j         |d<   |                    d| j                  |d<   |                    di                               d|i            | j        di |}|S # t          $ r}|d }~ww xY w)Nr   r  r  r   r   r  r{   )
_embeddingr'  r   r   r   r  ry  r  r   r  r  r  r  r|  s          r}   	embeddingzRouter.embedding.  s    		#F7O#F7O*./F&'$*JJ}d>N$O$OF=!j"--44mU5KLLL3t3==f==HO 	 	 	G	r  c           	      $   d }	 t          j        d| d|            |                     |||                    dd                     }|                     ||           |d                                         }|d         }|                     ||d	          }|                    d
d           }||||j        k    rd }	n|}	| j	        |xx         dz  cc<   | 
                    |           t          j        di i ||| j        |	d|}
| j        |xx         dz  cc<   t          j        d| d           |
S # t           $ rF}t          j        d| dt#          |           d           || j        |xx         dz  cc<   |d }~ww xY w)NzInside embedding()- model: r  r  )r   r  r  r  r   r   syncr  ru  ro   r  r  r  r  zlitellm.embedding(model=r  r  r  r{   )r   rz  r  r  r  rw  r  r'  ru  r   r  r   r  r   r   r  ry  r   r   )r  r  r   r  r  rs  r  r  r  r  r  r|  s               r}   r  zRouter._embedding@  sM   
4	!'GeGGvGG   66$*JJ/Dd$K$K 7  J
 //:f/UUU./4466DgJ%)%5%5%f& &6 & &" %jjD99O+*6#'='EEE#5Z(((A-((( 11Z1HHH(  "#3*	  
  H z***a/***!&N:NNN   O 	 	 	!&Z:ZZ3q66ZZZ   %
+++q0+++G	s   D:D? ?
F	AF

Fc           	        K   	 ||d<   ||d<   | j         |d<   |                     ||            | j        di | d {V }|S # t          $ r=}t	          j        t          | |t          j                    |                     |d }~ww xY wr  )	_aembeddingr  r  ry  r  r  r>   r  r  r  s          r}   
aembeddingzRouter.aembeddingx  s      	#F7O#F7O*.*:F&'00uV0LLL?T?II&IIIIIIIIHO 		 		 		(,0#)(1(<(>(>'(	     G		r  c           	      *  K   d }	 t          j        d| d|            t          |          }|                     |||                    dd           |           d {V }|                     ||           |d                                         }|d         }|                     ||          }| j        |xx         dz  cc<   t          j
        di i ||| j        |d	|}	|                     ||d
          }
|
mt          |
t          j                  rS|
4 d {V  	 |                     ||           d {V  |	 d {V }	d d d           d {V  n# 1 d {V swxY w Y   n%|                     ||           d {V  |	 d {V }	| j        |xx         dz  cc<   t          j        d| d           |	S # t&          $ rF}t          j        d| dt)          |           d           || j        |xx         dz  cc<   |d }~ww xY w)NzInside _aembedding()- model: r  r  )r   r  r  r  r  r   r   ro   r  r  r  r  zlitellm.aembedding(model=r  r  r  r{   )r   rz  r   r   r  r  rw  r  r   r   r  r   r  r   r  r  r	  r   r  ry  r   r   )r  r  r   r  r  r  rs  r  r  r  r  r|  s               r}   r  zRouter._aembedding  s     
B	!'IIIII    AHH#BB$*JJ/Dd$K$K%	  C          J //:f/UUU./4466DgJ>>% ?  L
 Z(((A-((()  "#3*	  
  H !,,%3 -  M (Zw0. .( ) . . . . . . . . EE#-@P F          &.~~~~~~H. . . . . . . . . . . . . . . . . . . . . . . . . . . AA)<L B          "*>>>>>>z***a/***!&OJOOO   O 	 	 	!&[J[[CPQFF[[[   %
+++q0+++G	s>   DG #'E
G 
E&&G )E&*AG 
HAHHc           	      N  K   	 ||d<   | j         |d<   |                    d| j                  |d<   |                     ||            | j        di | d {V }|S # t
          $ r=}t          j        t          | |t          j
                    |                     |d }~ww xY w)Nr   r  r   r  r  r{   )_acreate_filer'  r   r  r  ry  r  r  r>   r  r  r  s        r}   acreate_filezRouter.acreate_file  s      
	#F7O*.*<F&'$*JJ}d>N$O$OF=!00uV0LLL?T?II&IIIIIIIIHO 		 		 		(,0#)(1(<(>(>'(	     G		s   AA 
B$'8BB$c                 :   K   	 ddl m} t          j        d| d            t	                                         |dddg                    dd           	           d {V }d
t          dt          f fd}g }t          |t                    r|
                     ||                     n#|D ] }|
                     ||                     !t          j        |  d {V }t          |          dk    rt          d           |||          }	t          t          |d                   }
|	|
j        d<   |
S # t          $ rI}t          j        d| d dt%          |           d           | j        |xx         dz  cc<   |d }~ww xY w)Nr   )add_model_file_id_mappingsr  r  r  files-api-fake-textr  r  r   r~  r  r  r  rs  r   c           	      d  K   ddl m}  |          }                    | |d           | d                                         }|d         }                    | |          }j        |xx         dz  cc<   t          |d         	          \  }}}}t          t          t                   
                    d
                    }	t          t          t                   
                    d                    }
|
r|	st          d          t          |	          }|rt          |
|          }
|
|d<   t          j        di i ||j        |d|}                    | |d          }|mt'          |t(          j                  rS|4 d {V  	                     |            d {V  | d {V }d d d           d {V  n# 1 d {V swxY w Y   n%                    |            d {V  | d {V }j        |xx         dz  cc<   t1          j        d| d           |S )Nr   safe_deep_copyr  r-  r   r   r  ro   r  purposer  z2file and file_purpose are required for create_file)r   )file_contentnew_model_namer  r  r  r  r  r  litellm.acreate_file(model=r  r{   )'litellm.litellm_core_utils.core_helpersr  r  rw  r  r   rj   r   r   rH   r'  rF   ry  r/   r.   r   r  r   r  r   r  r  r	  r   r   r  )rs  r  kwargs_copyr  r  r  stripped_modelr  rN  r   r  replace_model_in_jsonl_boolr  r  r  r  r  s                 r}   create_file_for_deploymentz8Router._acreate_file.<locals>.create_file_for_deployment  s     RRRRRR,nV4433)&"0 4   
 ""2388::!']
#BB)&  C      ,,,1,,, =Mw-= = =9 3Q
 x(:;VZZ	=R=RSSHY/F1C1CDD 7 #L   /L#/ / /+ / /1%)'5  D
 +/K'"/  /B#'#7".	  
 &  !% 0 0)& 7 !1 ! ! !,!7#42 2,  - 2 2 2 2 2 2 2 2 #II'1DT J          *2>>>>>>2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 EE#-@P F          &.~~~~~~H":...!3...%*U*UUU    s   'G
GGNo healthy deployments found.)r   rL  model_file_id_mappingr  , r  r  ro   )!litellm.router_utils.common_utilsr  r   rz  r   async_get_healthy_deploymentsr  r  rG   r   r   r  rQ  rk  ry  r   rv  	exceptionr   r   )r  r   r  r  r   r	  tasksrs  rL  r  returned_responser|  r  s   ` `         @r}   r  zRouter._acreate_file  s     
}	TTTTTT!'OeOOvOO    AHH(,(J(J#)6KLLM$*JJ/Dd$K$K%!1 )K ) ) # # # # # #Q T Q FV Q  Q  Q  Q  Q  Q  Q  Q f E-t44 I778KLLMMMM"5 I IJLL!;!;J!G!GHHHH%ne4444444I9~~"" ?@@@$>$>$79% % %! !%%5y| D D & ,' %$ 	 	 	!+bebbvbbSVWXSYSYbbb    &&&!+&&&G	s   D?E 
FAFFc           	      p  K   	 ||d<   | j         |d<   |                    d| j                  |d<   t          d          }|                     |||            | j        di | d {V }|S # t          $ r=}t          j        t          | |t          j                    |                     |d }~ww xY w)	Nr   r  r   _acreate_batchr!  r  r  r{   )r  r'  r   r-   r  r  ry  r  r  r>   r  r  )r  r   r  r  r  r|  s         r}   acreate_batchzRouter.acreate_batcht  s     
	#F7O*.*=F&'$*JJ}d>N$O$OF=!%G.& & &" 00'= 1   
 @T?II&IIIIIIIIHO 		 		 		(,0#)(1(<(>(>'(	     G		s   A)A. .
B588B00B5c                 l  K   	 t          j        d| d|            t          |          }|                     |dddg|                    dd           |           d {V }|d                                         }|d	         }|                     ||d
           |                     ||          }| j        |xx         dz  cc<   t          |d	                   \  }}	}}t          j        di i ||	| j        |d|}
|                     ||d          }|mt          |t          j                  rS|4 d {V  	 |                     ||           d {V  |
 d {V }
d d d           d {V  n# 1 d {V swxY w Y   n%|                     ||           d {V  |
 d {V }
| j        |xx         dz  cc<   t          j        d| d           |
S # t(          $ rI}t          j        d| d| dt-          |           d           || j        |xx         dz  cc<   |d }~ww xY w)Nz Inside _acreate_batch()- model: r  r  r  r  r  r  r   r   r  r-  r  ro   r  r  r  r  r  zlitellm.acreate_batch(model=r  zlitellm._acreate_batch(model=r  r  r  r{   )r   rz  r   r   r  rw  r  r  r   rj   r   r  r   r  r   r  r  r	  r   r  ry  r  r   r   )r  r   r  r  rs  r  r  r  rN  r  r  r  r|  s                r}   r  zRouter._acreate_batch  s     
I	!'L5LLFLL    AHH#BB#)6KLLM$*JJ/Dd$K$K%	  C          J ./4466DgJ//%fDT 0     >>% ?  L Z(((A-((( ,<$w-+P+P+P(A"Aq,  +>#3*	  
  H !,,%3 -  M (Zw0. .( ) . . . . . . . . EE#-@P F          &.~~~~~~H. . . . . . . . . . . . . . . . . . . . . . . . . . . AA)<L B          "*>>>>>>z***a/***!&RzRRR   O 	 	 	!+dddddUXYZU[U[ddd    &&&!+&&&G	s>   D=G  'E:(G  :
FG  FAG   
H3*AH..H3c           	         	K   	 t                    }|9                     |dddg                    dd          |           d{V }n                                 }|t	          d          g dt
          f fd		|Lt          |t                    r7t          |          d
k    r$t          j
        	fd|D             ddi d{V }nK|:t          |t                    r% 	t          t
          |                     d{V }nt	          d          |Jt          |t                    r|S t          |t                    r|D ]}t          |t                    r|c S rd
         t	          d                                        # t          $ r=}t          j        t!           t#          j                    |                     |d}~ww xY w)z
        Iterate through all models in a model group to check for batch

        Future Improvement - cache the result.
        Nr  zretrieve-api-fake-textr  r  r  Router not yet initialized.r  c                   K   	 ddl m} | d                             d          }| d                                         }|                    d          }|t	          d|            |st          |          \  }}}} |	          }                    t          t          |           |d	           |	                    dd            |	                    dd            t          j        d
i i |d|i| d {V S # t          $ r3}dd l} |j                     
                    |           Y d }~d S d }~ww xY w)Nr   r  r   r   r  z2Model not found in litellm_params for deployment: r  aretrieve_batchr-  r{   )r  r  r'  rw  ry  rj   r  r   r  r  r   r  r  	print_excr   )r  r  r   r  r  rN  
new_kwargsr|  r  r  receieved_exceptionsr  s            r}   try_retrieve_batchz2Router.aretrieve_batch.<locals>.try_retrieve_batch   s     $ VVVVVV&'78<<WEEE%&67<<>>D*.((3H*I*I'}']Q[]]  
 / 7G"'8 8 84.1 "0!7!7J77#'j#9#9)&7 8   
 NN#8$???HH2D999!(!8 " ""13F  )" "        !      $$$$'I')))(//22244444 s   C;D 
D>(D99D>r   c                 L    g | ] } t          t          |                    !S r{   )r   rR   rJ  r   r  s     r}   rO  z*Router.aretrieve_batch.<locals>.<listcomp>.  s?       ! +*40CU+K+KLL  r|   return_exceptionsTr
  z7Unable to find batch in any model. Received errors - {}r  )r   r  r  r  ry  rR   r   r   rk  r  rQ  r  r   r`   r  r  r>   r  r  )
r  r   r  r  filtered_model_listresultsrf  r|  r  r  s
   ` `     @@r}   r  zRouter.aretrieve_batch  s     l	@HH  <<'-:RSST(.

3H$(O(O#)%5 =         $# '+&9&9&;&;#"* =>>>#% % 5H %  %  %  %  %  %  %  % R $/2D99 0+,,q00 '   %8  !
 '+! !       %0Z#T6 60 !3 2,.ABB! !         ?@@@ "g|44 *"N.. *") * *%fl;; *#)MMM* $ .*1-- IPP(   
  		 		 		(,0#)(1(<(>(>'(	     G		s$   D1F :1F ,-F 
G #8GG c                   K   |                      |          }|t          d          dt          ffdt          j        fd|D               d{V }dg dddd	}|D ]z}|v|d
         #t          |d
          rt          |d
          |d
<   t          |d          |d<   |d                             |j                   t          |dd          du rd|d<   {|S )zQ
        Return all the batches across all deployments of a model group.
        r  Nr  r   c                 n   K   	 t          j        di i | d          d {V S # t          $ r Y d S w xY w)Nr   r{   )r   alist_batchesry  r  s    r}   r  z0Router.alist_batches.<locals>.try_retrieve_batchh  s      $2  ;/0;F;            tts    & 
44c                 &    g | ]} |          S r{   r{   r  s     r}   rO  z(Router.alist_batches.<locals>.<listcomp>s  s%    IIIE  ''IIIr|   r   F)objectr  first_idlast_idhas_morer(  r)  r  r*  T)	r  ry  rR   r  rQ  r  r  extendr  )r  r   r  r!  r"  final_resultsrf  r  s     `    @r}   r%  zRouter.alist_batches[  s[      #11U1CC&9:::	,? 	 	 	 	 	 	  IIII5HIII
 
 
 
 
 
 

 
 
  
	5 
	5F! ,49T9T407
0K0KM*-+269+E+Ei(f%,,V[999 6:u55==04M*-r|   r  c                   K   |                      |                    dd          |           |                    d          r|                     |d                   rz|                     |d         |           d {V }|d         d         |d<   |d                                         }|                     ||           |                    |            |di | d {V S )	Nr   r  r  r  )r   r  r   r  r{   )r  r'  r  r   rw  r  r   )r  r  r  r  rs  r  s         r}   )_pass_through_moderation_endpoint_factoryz0Router._pass_through_moderation_endpoint_factory  s;      	,,**Wb)) 	- 	
 	
 	
 ::g 	 4#6#6&/#6#R#R 	 #BBWo%  C          J ))9:7CF7O./4466D//% 0    MM$&&00000000000r|   
assistantsrE  )r/  rD  rF  rI  rN  rL  rM  rO  rJ  rK  rQ  rR  rS  rT  rU  rV  rW  rX  rP  rG  rb  ra  rc  r]  r^  r_  r`  c                      dv r0	 	 ddt           t                   dt           t                   f fd}|S 	 	 ddt           t                   dt           t                   f fd}|S )z
        Creates appropriate wrapper functions for different API call types.

        Returns:
            - A synchronous function for synchronous call types
            - An asynchronous function for asynchronous call types
        )rL  rb  rc  r_  r`  Nr  r  c                 "     j         ddi|S )Nr  r{   )r  )r  r  r  r  r  s      r}   sync_wrapperz-Router.factory_function.<locals>.sync_wrapper  s1    
 =t<  &7;A  r|   c                   K   dk    r j         d| |d| d {V S dk    r j        ddi| d {V S dv r j        ddi| d {V S dk    r j        ddd| d {V S d	v r j        ddi| d {V S d
v r j        d| d| d {V S dv r j        d| |d| d {V S d S )Nr/  )r  r  r  rD  r  )rF  rI  rQ  rR  rS  rT  rU  rV  rW  rG  ra  rX  T)r  r  )rM  rN  rO  rP  )r]  r^  )r  r  )rJ  rK  r{   ))_pass_through_assistants_endpoint_factoryr.  r  _init_responses_api_endpoints _init_vector_store_api_endpoints)r  r  r  rE  r  r  s      r}   async_wrapperz.Router.factory_function.<locals>.async_wrapper  s     
 L((KTK &7(;!  	         l**KTK  &7;A             DTC  &7         666CTC &715          
     @T?  &7             CTB &7(;          
 ???CTC &7(;!  	         @?r|   NN)r   r   r   )r  r  rE  r2  r7  s   ```  r}   r9  zRouter.factory_function  s    R  
 
 
 6:(, %-c]          26$(@	 @	!)#@	SM@	 @	 @	 @	 @	 @	 @	 @	D r|   c                 8   K   |r	d|vr||d<    |di | d{V S )zJ
        Initialize the Vector Store API endpoints on the router.
        r  Nr{   r{   )r  r  r  r  s       r}   r6  z'Router._init_vector_store_api_endpoints)  sP        	@#8#F#F,?F()&&00000000000r|   c                    K   ddl m} |                    |                    d                    }|||d<    | j        dd|i| d{V S )z
        Initialize the Responses API endpoints on the router.

        GET, DELETE, CANCEL Responses API Requests encode the model_id in the response_id, this function decodes the response_id and sets the model to the model_id.
        r   )ResponsesAPIRequestUtilsresponse_idNr   r  r{   )litellm.responses.utilsr;  get_model_id_from_response_idr'  r  )r  r  r  r;  model_ids        r}   r5  z$Router._init_responses_api_endpoints6  s       	EDDDDD+IIJJ}%%
 
 &F7O;T; 
 
/

 
 
 
 
 
 
 
 	
r|   r  c                    K   |D| j         .| j         d         }|                    | j         d                    nt          d           |d||d| d{V S )z@Internal helper function to pass through the assistants endpointNr  r   z'custom_llm_provider' must be set. Either via:
 `Router(assistants_config={'custom_llm_provider': ..})` 
or
 `router.arun_thread(custom_llm_provider=..)`)r  r  r{   )r   r   ry  )r  r  r  r  r  s        r}   r4  z0Router._pass_through_assistants_endpoint_factoryL  s       &%1&*&<=R&S#d45EFGGGG s   '& 
 3F
 
FL
 
 
 
 
 
 
 
 	
r|   r|  r  r  c	                   K   t          j        dt          j                                |}	d}
|                    d          }d}|du s||| |	d|}d|vr
| j        |d<   d|vrd	|d<   	 t          j        d
           t          |          }|r,|                    ||d           t          |i | d{V }|S t          |t          j                  r|G|                     ||          }||	|                    ||d           t          |i | d{V }|S d                    |||          }t          j        d                    |                     |xj        d                    |          z  c_        nt          |t          j                  r|G|                     ||          }||	|                    ||d           t          |i | d{V }|S d                    |||          }t          j        d                    |                     |xj        d                    |          z  c_        ||t          j        d|            t#          |t%          t&          |                    \  }
}|
|||         d         }
|
Bt          j        d| d|            t)          |	d          r|	xj        d| d| z  c_        |	|                    |
|d           t          |i | d{V }|S n# t*          $ r}t-          |          }t          j        d                    t'          |          t          j                    t1          | |           d{V                      t'          |          }Y d}~nd}~ww xY wt)          |	d          rZ|	xj        d                    ||
          z  c_        t3          |          d	k    r#|	xj        d                    |          z  c_        |	)zD
        Common utilities for async_function_with_fallbacks
        	TracebackNr   r  T)litellm_routerr  r   fallback_depthr   zTrying to fallback b/w models)r   )fallback_model_grouporiginal_model_group)r   r  zmodel={}. context_window_fallbacks={}. fallbacks={}.

Set 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbackszGot 'ContextWindowExceededError'. No context_window_fallback set. Defaulting                         to fallbacks, if available.{})msgz
{}zmodel={}. content_policy_fallback={}. fallbacks={}.

Set 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbackszGot 'ContentPolicyViolationError'. No content_policy_fallback set. Defaulting                         to fallbacks, if available.{}zinside model fallbacks: r   z7No fallback model group found for original model_group=z. Fallbacks=r  zlitellm.router.py::async_function_with_fallbacks() - Error occurred while trying to do fallbacks - {}
{}

Debug Information:
Cooldown Deployments={}r  r  z<. Received Model Group={}
Available Model Group Fallbacks={}z
Error doing the fallback: {})r   rz  r  r  r'  r   r  r9   r   r;   r   r   ContextWindowExceededError(_get_fallback_model_group_from_fallbacksr  r  r  r:   r   r   r  ry  r   r  r6   rk  )r  r|  r  r   r   r   r  r  r  r  rE  rF  fallback_failure_exception_strr  is_non_standard_fallback_formatr  #context_window_fallback_model_grouperror_message#content_policy_fallback_model_groupgeneric_fallback_idxnew_exceptionr  s                         r}   r  z1Router.async_function_with_fallbacks_common_utilsb  sy      	#$H	0D0F0F$H$HIII#.4jj.A.A)+&$$(<(DG #"4
 
 
 ,..,0,>L)<//-.L)*L	@!&'FGGG /R#/ / /+ /  ##090D    "4""" "      
  !W?@@ F>+7 EE":$/ F   8 ;B00 ''4W4H    &8&&& &            H $O %m  %t  %t#%=y% %M *.77=v)8 8    II}!=!==IIIAwBCC ">+7 EE":$/ F   8 ;B00 ''4W4H    &8&&& &            H $O %l  %s  %s#%=y% %M *.77=v)8 8    II}!=!==II$)@%+,Ry,R,RSSS -' $S+ 6 6  (( (/4H4T+45I+J3+O('/).vR]vvktvv   19== U*22  7Up{  7U  7U  JS  7U  7U  U22,,##0D0D    "4""" "      
   	@ 	@ 	@@HH!' k  r  r&&(**I04)9         	 	 	 .1-?-?******	@ %y11 	&&*i*p*p$+ + && 122Q66"**4;;6 ** ! s/   -AL ?A"L "CL (D%L 
NA:NNc                   K   |                     d          }|                    dd          }|                     d| j                  }|                     d| j                  }|                     d| j                  }|                    dd          }	 |                     |||||	           | | j        |i |d|i d{V }	n | j        |i | d{V }	t          j        d
|	            t          |	d          }	|	S # t          $ r,}
|                     |
|||||||           d{V cY d}
~
S d}
~
ww xY w)z
        Try calling the function_with_retries
        If it fails after num_retries, fall back to another model group
        r   r  Fr   r   r   mock_timeoutN)r  r  r   r   r   zAsync Response: r   )r  attempted_fallbacks)r'  r  r   r   r   _handle_mock_testing_fallbacksasync_function_with_retriesr   rz  r+   ry  r  )r  r  r  r  r  r   r   r   rS  r  r|  s              r}   r  z$Router.async_function_with_fallbacks!  s	      &,ZZ%8%8,2JJ7JE,R,R$*JJ{DN$K$K	39::&(E4
 4
  4:::&(E4
 4
  zz.$77	//'#)A)A 0    '!A!A"#" "2>" " "       "B!A4!R6!R!RRRRRRR!'(E8(E(EFFF7!$%  H O 
	 
	 
	HH!((	 	 	 	 	 	 	 	 	 	 	 	 	 	
	s   A0D 
D=!D82D=8D=c                 @   t          j        |          }|j        &|j        du rt          j        |dd| d|           |j        &|j        du rt          j        |dd| d|           |j        &|j        du rt          j        |dd| d|           dS dS )	a  
        Helper function to raise a litellm Error for mock testing purposes.

        Raises:
            litellm.InternalServerError: when `mock_testing_fallbacks=True` passed in request params
            litellm.ContextWindowExceededError: when `mock_testing_context_fallbacks=True` passed in request params
            litellm.ContentPolicyViolationError: when `mock_testing_content_policy_fallbacks=True` passed in request params
        NTr  #This is a mock exception for model=z#, to trigger a fallback. Fallbacks=)r   r  r  zF, to trigger a fallback.                     Context_Window_Fallbacks=zF, to trigger a fallback.                     Context_Policy_Fallbacks=)	rT   from_kwargsmock_testing_fallbacksr   InternalServerErrormock_testing_context_fallbacksrI  %mock_testing_content_policy_fallbacksr  )r  r  r  r   r   r   mock_testing_paramss          r}   rU  z%Router._handle_mock_testing_fallbacksT  s'     6A&II6B#:dBB-!ykyynwyy     >J#BdJJ4!Ik I I.FI I     EQ#ITQQ5!Ik I I.FI I    RQQQr|   c           
      4  K   t          j        d           |                    d          }|                    d| j                  }t	          |          }|                    d| j                  }|                    d| j                  }|                    d          }|                    d          }	|                    d|                    d	                    pi }
d
|
v r]t          |
d
         t                    rB| 
                    |
d
                   }|$|
                    dt          |          i           t          j        d| d|	            	 |                     ||            | j        |g|R i | d {V }t          |dd           }|S # t           $ rm}d }|}t#          |dd           }|t          |t$                    r|}		 |                     |                    d          pd|           d {V \  }}|                     ||||||           | j        | j        .|                     ||                    d                    }||}	|	dk    r|                     ||          }n t          j        d|	            |                     ||	|	||          }t5          j        |           d {V  t9          |	          D ]}	  | j        |g|R i | d {V }t;          j        |          r| d {V }t          ||dz   |	          }|c cY d }~S # t           $ r}|                     ||          }|	|z
  }|                    d          }|!|                     ||           d {V \  }}ng }|                     |||	||          }t5          j        |           d {V  Y d }~d }~ww xY wt?          |          t@          j!        v r"tE          |d|	           tE          |d|           |d }~ww xY w)Nz#Inside async function with retries.r  r   r   r   r   r   r  r   r  r  model_group_sizez/async function w/ retries: original_function - z, num_retries - )r  r  r   )r  attempted_retriesr   r  r  )r  r   all_deploymentsr   regular_fallbacksr   )r  r  )r  r|  z#Retrying request with num_retries: )r|  remaining_retriesr   r   rb  ro   r   )#r   rz  r  r   r   r   r   r'  r   r   r  r   rk  %_handle_mock_testing_rate_limit_error	make_callr,   ry  r  r   r  should_retry_this_errorr   r   r<   	log_retry_time_to_sleep_before_retryr  r  rS  r   is_async_callabler   r   LITELLM_EXCEPTION_TYPESr  )r  r  r  r  r   r  r   r   r  r   	_metadatar   r  r|  current_attemptr  deployment_num_retriesr  _all_deployments_retry_policy_retriesr   rd  _modelrN  _timeouts                            r}   rV  z"Router.async_function_with_retries  s     #$IJJJ"JJ':;;JJ{DN;;	<VDD#)::&(E$
 $
  $*::&(E$
 $
  &,ZZ%8%8jj// !**%7J9O9OPPVTV	I%%*Y}5Ms*S*S%,,	-8P,QQJ%  "4c*oo!FGGG#n>Onnalnn	
 	
 	
r	%66' 7    ,T^,=OOOOOOOOOOOOH4!QD  H O h	% h	% h	%"O!"%,Qt%D%D"%1j&7 71 5 99jj))/R!1 :        $  (($8 0)A"+)A )    !-0< )-(N(N0fjj>Q>Q )O ) )% )4"7KQv9KLL!'CkCC   ::$"-'$8 0 ;  K -,,,,,,,,,#(#5#5 &2 &2%2%3T^4E%W%W%W%WPV%W%WWWWWWWH(:   2 *2>>>>>><!)*9A*=$/     H
 $OOOOOOOO  2 2 2!^^6Q^??F(3o(E%,2JJw,?,?F) #'"E"E"(-= #F # #      0A 02,#??,*;$/,@(8  @    H "-11111111111111-20 &''7+JJJ*M;GGG*M?KKK$$Qh	%sM   AF P*D6P!A	L2*P,P2
O<B
OPOAPPc                    K   |                     d          } ||i |}t          j        |          st          j        |          r| d{V }|                     ||           d{V }|S )z^
        Handler for making a call to the .completion()/.embeddings()/etc. functions.
        r   N)r  r  )r'  r   rj  inspectisawaitableset_response_headers)r  r  r  r  r  r  s         r}   rf  zRouter.make_call  s       jj))$$d5f55.x88 	&G<O=
 =
 	& &~~~~~~H22; 3 
 
 
 
 
 
 
 
 r|   c                    |                     dd          }|                     |          }d}|Rt          |          dk    r?t          t          t
                   |d         d                             d                    }|7|du r5t          j        d	|            t          j
        |d
d| d|          dS dS )z
        Helper function to raise a mock litellm.RateLimitError error for testing purposes.

        Raises:
            litellm.RateLimitError error when `mock_testing_rate_limit_error=True` passed in request params
        mock_testing_rate_limit_errorNr  ro   r   r   r   TzTlitellm.router.py::_mock_rate_limit_error() - Raising mock RateLimitError for model=r  rX  z , to trigger a rate limit error.)r   r  r  r   )r  r  rk  r   r   r   r'  r   r  r   RateLimitError)r  r  r  rx  available_modelsr   s         r}   re  z,Router._handle_mock_testing_rate_limit_error!  s     9?

+T9
 9
%  ..+.FF%)'C0@,A,AQ,F,F/23CDHHWW K
 *5-55!&tgrtt   (!kkkkk'	    6555r|   r  r   rb  rc  c                    d}|$t          |t                    rt          |          }d}|$t          |t                    rt          |          }t          |t          j                  r||t          |t          j                  r||t          |t          j                  r|t          |t          j                  r|dk    r|t          |          dk    r|t          |t          j	                  r		 |dk    r||dk    r|dS )au  
        1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None
        2. raise an exception for ContentPolicyViolationError if content_policy_fallbacks is not None

        2. raise an exception for RateLimitError if
            - there are no fallbacks
            - there are no healthy deployments in the same model group
        r   Nro   T)
r   r   rk  r   rI  r  NotFoundErrorr   ry  AuthenticationError)	r  r  r   rb  r   r   rc  _num_healthy_deployments_num_all_deploymentss	            r}   rg  zRouter.should_retry_this_errorD  s8   " $% *z:Mt/T/T*'*+>'?'?$ &:ot+L+L&#&#7#7  ug@AA	(4K ugABB	(4KeW233 	KeV233 	(A--%1)**Q..eV788 	
 %)) $q((Ktr|   c                 ,    t          | j        g|R i |S )z~
        Sync wrapper for async_function_with_fallbacks

        Wrapped to reduce code duplication and prevent bugs.
        )r   r  )r  r  r  s      r}   r  zRouter.function_with_fallbacks  s%     "$"DVtVVVvVVVr|   c                     |dS d}|D ]7}t          |                                          d         |k    r
||         } n8|S )a@  
        Returns the list of fallback models to use for a given model group

        If no fallback model group is found, returns None

        Example:
            fallbacks = [{"gpt-3.5-turbo": ["gpt-4"]}, {"gpt-4o": ["gpt-3.5-turbo"]}]
            model_group = "gpt-3.5-turbo"
            returns: ["gpt-4"]
        Nr   )r   keys)r  r   r  rE  r  s        r}   rJ  z/Router._get_fallback_model_group_from_fallbacks  sa     448 	 	DDIIKK  #{22'+K'8$ 3 $#r|   rd  c                    |t          |          dk    rn,|*t          |t                    rt          |          dk    rdS d}t          |d          r!t          |j        d          r|j        j        }t          |d          r|j        }|t          j        |||| j	                  }nt          j        ||| j	                  }|S )	z
        Calculate back-off, then retry

        It should instantly retry only when:
            1. there are healthy deployments in the same model group
            2. there are fallbacks for the completion call
        Nro   r   r  headerslitellm_response_headers)rd  r   response_headersmin_timeout)rd  r   r  )
rk  r   r   r  r  r  r  r   _calculate_retry_afterr   )r  r|  rd  r   r   rb  r  r   s           r}   ri  z"Router._time_to_sleep_before_retry  s    " &3+?+?1+D+D+.55 ,'((1,,1481j!! 	2gaj)&D&D 	2 z11011 	: 9'4"3'!1 ,	  GG 4"3' ,  G r|   c                 T  K   ddl m} 	 |                    dd          }|t          d          |d                             d          dS |d         d                             dd          }|d         d                             d	d          }|d                             d
i           pi }	|	                    dd          }
||
dS t	          |
t
                    rt          |
          }
|                     |
          }|dS |                     |	                                |          }|                    dd          }|                    dd          }|j
        j        }|j
        j        }|                    dd          }|                    dd          }t          | |
           ||
||||dS t          |          }|                    dd          }t                      }|                    d          }t"          j        j                            |
||          }g }|                     |||t,          j        j                             t"          j        j                            |
||          }|                     ||dt,          j        j                             | j                            ||           d{V  |S # t6          $ r?}t9          j        d                    t          |                               Y d}~dS d}~ww xY w)zG
        Track remaining tpm/rpm quota for model in model_list
        r   )RedisPipelineIncrementOperationstandard_logging_objectNzstandard_logging_object is Noner   r   rs  r  r   r"  r?  rs  received_model_nametpmrpmr  deployment_idtotal_tokens%H-%Mr"  current_minuter   )r  increment_valuerz   ro   )increment_listr  zOlitellm.router.Router::deployment_callback_on_success(): Exception occured - {})litellm.types.cachingr  r'  rj  r   r   r   get_deploymentget_router_model_infor  r   r  r  rB   r   rm   strftimerX   TPMr.  r  r   ru   rz   RPMr   async_increment_cache_pipelinery  r   rz  )r  r  completion_responser  r  r  r  deployment_namer  r   r"  deployment_infodeployment_model_infor  r  tpm_litellm_paramsrpm_litellm_paramstpm_model_inforpm_model_infor  r  dtr  tpm_keypipeline_operationsrpm_keyr|  s                              r}   r   z%Router.deployment_callback_on_success  s      	JIIIIIt	HN

)4I I# '. !BCCC&'++J77?"()9"::"F"J"J $# # %%56zBFF!4  $$4599,KKQr
^^D$//&"*FC(( !RB #'"5"5r"5"B"B"*F,0,F,F#2#=#=#?#?,7 -G - -)
 *--eT::C)--eT::C *9)G)K&)8)G)K& &;%>%>ud%K%KN%:%>%>ud%K%KN B,0"$    K*2*2&.&.F#DV#L#L &=&A&A.RS&T&T
 &''!#" " *-3::. ;   NP# $**33#(4'O1     *-3::. ;   $**33#()'O1     j??#6%5 @         
  	 	 	!'ahhFF   
 DDDDD	s3   AK A<K <K 
B6K DK 
L'(4L""L'c                 l   d}|d                              d          n|d         d                              dd          }|d                              di           pi }|                     dd          }||dS t          |t                    rt          |          }|t	          | |          }|S dS )z
        Tracks the number of successes for a deployment in the current minute (using in-memory cache)

        Returns:
        - key: str - The key used to increment the cache
        - None: if no key is found
        Nr   r   r  r   r"  r  )r'  r   r   r   rB   )	r  r  r  r  r  r"  r  r   r  s	            r}   r  z*Router.sync_deployment_callback_on_success\  s     "#''
33; !12:>BB=RVWWK 0155lBGGM2Jd++B"bjtB$$ WW>C(,   C Jtr|   c                    t          j        d           	 |                    dd          }t          |dd          }|                    di           }|                    di           }t          j        j                            |          }	|                    d	d          }
d}|	 t          j        	                    |	
          }|
	|
dk    r|
}n|	|dk    r|}n| j
        }t          |t                    r@|                    d          }|dS t          | |           t          | ||||          }|S t          j        d           dS # t          $ r}|d}~ww xY w)a`  
        2 jobs:
        - Tracks the number of failures for a deployment in the current minute (using in-memory cache)
        - Puts the deployment in cooldown if it exceeds the allowed fails / minute

        Returns:
        - True if the deployment should be put in cooldown
        - False if the deployment should not be put in cooldown
        z1Router: Entering 'deployment_callback_on_failure'r  Nstatus_coder  r   r   )r  r   )r  r   r"  Fr  r  exception_statusr  rs  time_to_cooldownzWRouter: Exiting 'deployment_callback_on_failure' without cooldown. No model_info found.)r   rz  r'  r  r   litellm_core_utilsexception_mapping_utils_get_response_headersutils&_get_retry_after_from_exception_headerr   r   r  rA   r8   ry  )r  r  r  r  r  r  r  r   _model_infoexception_headersdeployment_cooldownheader_cooldown_time_to_cooldownr  rf  r|  s                   r}   r  z%Router.deployment_callback_on_failure  s     	#$WXXX9	

;55I&y-DD $ZZ(8"==N(,,\2>>K ' : R h h#, !i ! !
 #1"4"4_d"K"K"O ,")-"V"V%6 #W # # #.3F!3K3K$7!! ,A1E1E$3!!$($6!+t,, /:t/D/D ( 5@,0"/    3,0%5'0,%6   %+m   u 	 	 	G	s$   C;E &E :E 
E!EE!r  c                 f  K   |d         d                              dd          }|d         d                              dd          }|d                              di           pi }|                     dd          }||dS t          |t                    rt          |          }t	          |          }	t                      }
|
                    d          }t          j        j	        
                    |||	          }| j                            |d
|	t          j        j	                   d{V  dS )z3
        Update RPM usage for a deployment
        r   r   rs  Nr  r   r"  r  r  ro   )r  r.  r  rz   )r'  r   r   r   r   rm   r  rX   r  r.  r  r   async_increment_cacheru   rz   )r  r  r  r  r  r  r  r   r"  r  r  r  r  s                r}   r  z+Router.async_deployment_callback_on_failure  s`      !!12:>BB$
 
 -.z:>>}dSS,-11,CCIr
^^D$''"*FC   	RB<VDD
 

 "%+22. 3 
 
 j..-%	 / 
 
 	
 	
 	
 	
 	
 	
 	
 	
 	
r|   )r   r  c                     d|v rdndS )a  
        Helper to return what the "metadata" field should be called in the request data

        - New endpoints return `litellm_metadata`
        - Old endpoints return `metadata`

        Context:
        - LiteLLM used `metadata` as an internal field for storing metadata
        - OpenAI then started using this field for their metadata
        - LiteLLM is now moving to using `litellm_metadata` for our metadata
        r  r   r{   )r  r  s     r}   '_get_metadata_variable_name_from_kwargsz.Router._get_metadata_variable_name_from_kwargs  s     &86%A%A!!zQr|   c                 0   	 d|v rdnd}t          |          j        t          |          d}|                                D ]c\  }}||ddfvr|||<   ||k    rKt	          |t
                    r6i ||<   ||                                         D ]\  }}|dk    r|||         |<   dt          | j                  dk    r| j                            d           | j        	                    |           | j        ||         d<   |S # t          $ r}|d	}~ww xY w)
z
        When a retry or fallback happens, log the details of the just failed model call - similar to Sentry breadcrumbing
        r  r   )exception_typeexception_stringr~  r  r      r   N)r   rw   r   r  r   r  rk  r   r  r   ry  )	r  r  r|  _metadata_varprevious_modelr  r  
metadata_k
metadata_vs	            r}   rh  zRouter.log_retry  sr   	&8F&B&B""
 
 #'q''"2$'FF N 	G G  ]J8KLLL()N1%%-''Jq$,?,?'46N=1282G2M2M2O2O G G.
J%):::<FN1-j9 4'((1,,$((+++ ''7777;7KF=!"34M 	 	 	G	s   DD 
DDDr  r  c                     |}| j                             ||d          }|!d}| j                             ||dd           n"|dz  }| j                             ||d           |S )zf
        Update deployment rpm for that minute

        Returns:
        - int: request count
        T)r  r  
local_onlyNro   rv   )r  r.  r  rz   )r  r.  r  )r   	get_cache	set_cache)r  r  r  r  request_counts        r}   _update_usagezRouter._update_usage"  s      
,,*:t - 
 
  MJ  =Tr !     QMJ  =T !    r|   c                 d    | j         dS | j         D ]}t          |t                    rd|v r dS dS )NFr   T)r   r   r  )r  fallbacks     r}   _has_default_fallbackszRouter._has_default_fallbacks=  sH    >!5 	  	 H(D))  (??44ur|   r  c                    |j         r0t          |j                   dk    r|j         d         j        dk    rdS |                    d| j                  }|Ad}|D ]7}t          |                                          d         |k    r
||         } n8|dS n|                                 rdS t          j	        d
                    ||                     dS )z
        Determines if a content policy error should be raised.

        Only raised if a fallback is available.

        Else, original response is returned.
        r   content_filterFr   NTzyContent Policy Error occurred. No available fallbacks. Returning original response. model={}, content_policy_fallbacks={})choicesrk  finish_reasonr'  r   r   r  r  r   rz  r  )r  r   r  r  r   rE  r  s          r}   r  z)Router._should_raise_content_policy_errorF  s     	H$4 5 5 9 9"04DDDu#)::&(E$
 $
 
 $/#' 0  		$$Q'500+/;(E 1 $/t 0((** 	4# H  O  O/ 	
 	
 	

 ur|   c                    g }	 |                      |          \  }}t          |t                    rg S n# t          $ r Y nw xY wt	          | |          }g }|D ](}|d         d         |v r|                    |           )||fS )Nr  rH  r   r"  )#_common_checks_available_deploymentr   r  ry  r7   r   )r  r   r  ro  rN  unhealthy_deploymentsr   rs  s           r}   _get_healthy_deploymentszRouter._get_healthy_deploymentsl  s    !#	"&"J"J #K # #A *D11 	 	 	 	D	 !:$(;K!
 !
 !
 %'* 	7 	7J,'-1FFF#**:6666"$444s   /5 
AAc                 >  K   g }	 |                      |          \  }}t          |t                    rg |fS n# t          $ r Y nw xY wt	          | |           d{V }t          |          }g }|D ]'}|d         d         |vr|                    |           (||fS )z
        Returns Tuple of:
        - Tuple[List[Dict], List[Dict]]:
            1. healthy_deployments: list of healthy deployments
            2. all_deployments: list of all deployments
        r  rH  Nr   r"  )r  r   r  ry  r5   setr   )	r  r   r  ro  rN  r  unhealthy_deployments_setr   rs  s	            r}   r  z%Router._async_get_healthy_deployments  s      "$	"&"J"J #K # #A *D11 ,+++, 	 	 	D	 'F$(;K'
 '
 '
 !
 !
 !
 !
 !
 !
 %((=$>$>!$&* 	7 	7J,'-5NNN#**:666"$444s   19 
AAc                 x    t           j        D ],}t          |t                    r|                    |           -dS )a  
        Mimics 'async_routing_strategy_pre_call_checks'

        Ensures consistent update rpm implementation for 'usage-based-routing-v2'

        Returns:
        - None

        Raises:
        - Rate Limit Exception - If the deployment is over it's tpm/rpm limits
        N)r   r"  r   r   rq  )r  rs  rr  s      r}   r  z'Router.routing_strategy_pre_call_checks  sG     !* 	5 	5I)\22 5((444	5 	5r|   r  c           
      p  K   t           j        D ]}t          |t                    r	 |                    ||           d{V  7# t           j        $ r}|t          j        |                    |t          j
                    t          j                                         t          j        |j        |t          j
                    f                                           t!          | |j        ||d         d         | j                   |d}~wt&          $ r}|t          j        |                    |t          j
                    t          j                                         t          j        |j        |t          j
                    f                                           |d}~ww xY wdS )O  
        For usage-based-routing-v2, enables running rpm checks before the call is made, inside the semaphore.

        -> makes the calls concurrency-safe, when rpm limits are set for a deployment

        Returns:
        - None

        Raises:
        - Rate Limit Exception - If the deployment is over it's tpm/rpm limits
        Nr  traceback_exceptionr  targetr  r   r"  r  )r   r"  r   r   async_pre_call_checkry  r  r  async_failure_handlerr  r  r  	threadingThreadfailure_handlerstartr8   r  r   ry  )r  rs  r  r  rr  r|  s         r}   r	  z-Router.async_routing_strategy_pre_call_checks  s     " !* *	 *	I)\22 )(#88EUVVVVVVVVVV-   ".+'==*+4=4H4J4J)- >     "(#.#>"#Y%9%;%;!<    %'''-04)*+,#-l#;D#A)-);    G    ".+'==*+4=4H4J4J)- >     "(#.#>"#Y%9%;%;!<    %'''G5)*	 *	s%   AF1B:DF1BF,,F1r  c           
        K   |}t           j        D ]}t          |t                    r	 |                    |||||           d{V }9# t
          $ r}	|t          j        |                    |	t          j
                    t          j                                         t          j        |j        |	t          j
                    f                                           |	d}	~	ww xY w|S )r  r   r   r~  r  r  Nr  r  )r   r"  r   r   async_filter_deploymentsry  r  r  r  r  r  r  r  r  r  r  )
r  r   r   r~  r  r  r  returned_healthy_deploymentsrr  r|  s
             r}   !async_callback_filter_deploymentsz(Router.async_callback_filter_deployments  s>     ( (;$ * 	 	I)\22 '@@"'0L%-+9-= A         10 !   ".+'==*+4=4H4J4J)- >     "(#.#>"#Y%9%;%;!<    %'''G6 ,+s    A


C)BC$$C)r   c                    |}|                                 D ]\  }}t          |t                    r||z  }n?t          |t                    r|t	          j        |          z  }n|t          |          z  }t          |t                    r||z  }zt          |t                    r|t	          j        |          z  }|t          |          z  }t          j        |                                          }|	                                S )z
        Helper function to consistently generate the same id for a deployment

        - create a string from all the litellm params
        - hash
        - use hash as id
        )
r  r   r   r  jsondumpshashlibsha256encode	hexdigest)r  r  r   
concat_strr  r  hash_objects          r}   r%  zRouter._generate_model_id!  s     !
"((** 	% 	%DAq!S!! %a

At$$ %djmm+

c!ff$
!S!! %a

At$$ %djmm+

c!ff$

nZ%6%6%8%899$$&&&r|   r  _model_name_litellm_paramsr  c                 :   	 t          di |}t          di ||||d}t          j                                        D ],}|j                            |          |j        |         ||<   -|j        j        }|t          j
        ||i           |j        j        }|j        j        |j        j        dz   |z   }t          j
        ||i           |                     |          dur,t          j        d|j         d|j        d	                     dS |                     |          }|                    d
          }	|                     |	|j        j                   |S # t(          $ r,}
| j        rt          j        d|
 d           Y d}
~
dS |
d}
~
ww xY w)a^  
        Create a deployment object and add it to the model list

        If the deployment is not active for the current environment, it is ignored

        Returns:
        - Deployment: The deployment object
        - None: If the deployment is not active for the current environment (if 'supported_environments' is set in litellm_params)
        r$  N)
model_costr  r  TzIgnoring deployment z% as it is not active for environment supported_environmentsr   r   r?  zError creating deployment: 1, ignoring and continuing with other deployments.r{   )rS   rQ   rO   model_fieldsr  r   r'  r   r"  r   register_modelr   r  $deployment_is_active_for_environmentr   warningr  _add_deploymentto_json _add_model_to_list_and_index_mapry  r   r  )r  r  r  r  r  r   rs  fieldr?  r   r|  s              r}   _create_deploymentzRouter._create_deployment=  s]    ;	-;-N-No-N-NN#  !&-&	   J 4@EEGG J J,0077C)3)B5)IK& ",/H#& +     %39K(<H-ACG+U  "    99Z9PP  &- Y:+@  Y  Ygqg|  ~V  hW  Y  Y   t---DDJ&&D&99E11j&;&> 2     	 	 	. %/f!fff   ttttt	s%   DE$ AE$ $
F.FFFc                 >    |j                             d          rdS dS )z
        Check if the deployment is an auto-router deployment.

        Returns True if the litellm_params model starts with "auto_router/"
        zauto_router/TF)r   
startswith)r  r   s     r}   _is_auto_router_deploymentz!Router._is_auto_router_deployment  s'     **>:: 	4ur|   c                 r   ddl m} |j        j        }|j        j        }||t          d          |j        j        }|t          d          |j        j        }|t          d           ||j        |||||           }|j        | j	        v rt          d|j         d	          || j	        |j        <   dS )
z
        Initialize the auto-router deployment.

        This will initialize the auto-router and add it to the auto-routers dictionary.
        r   )rr   Nzzauto_router_config_path or auto_router_config is required for auto-router deployments. Please set it in the litellm_paramszfauto_router_default_model is required for auto-router deployments. Please set it in the litellm_paramszhauto_router_embedding_model is required for auto-router deployments. Please set it in the litellm_params)r  auto_router_config_pathauto_router_configdefault_modelembedding_modelr  zAuto-router deployment z3 already exists. Please use a different model name.)
/litellm.router_strategy.auto_router.auto_routerrr   r   r  r  rj  auto_router_default_modelauto_router_embedding_modelr  r   )r  rs  rr   r  r  r  r  autor_routers           r}   init_auto_router_deploymentz"Router.init_auto_router_deployment  s/    	ONNNNN %= 	  -7,E,X"*/A/I M   %? 	  x   %A 	 "z   $.:!,$;1'+$($
 $
 $
  D$555t**?ttt   4@*/000r|   c           	      Z   |j         d|j         vs|j         d         dS t          d          }|t          d          |t          vrt          dt           d|           |j         d         D ](}|t          vrt          d	t           d| d
|           )||j         d         v rdS dS )a  
        Function to check if a llm deployment is active for a given environment. Allows using the same config.yaml across multople environments

        Requires `LITELLM_ENVIRONMENT` to be set in .env. Valid values for environment:
            - development
            - staging
            - production

        Raises:
        - ValueError: If LITELLM_ENVIRONMENT is not set in .env or not one of the valid values
        - ValueError: If supported_environments is not set in model_info or not one of the valid values
        Nr  TLITELLM_ENVIRONMENT)secret_namezPSet 'supported_environments' for model but not 'LITELLM_ENVIRONMENT' set in .envz#LITELLM_ENVIRONMENT must be one of z. but set as: z&supported_environments must be one of z for deployment: F)r   r   rj  rJ   )r  rs  litellm_environment_envs       r}   r   z+Router.deployment_is_active_for_environment  s%    !)'z/DDD$%=>F4,9NOOO&b   &@@@u6Puu`suu   )*BC 	 	D555  K=W  K  Kgk  K  K  I  K  K   6
 *"78P"QQQ4ur|   c                    t          j        |          }g | _        i | _        i | _        |D ]B}|                    d          }|                    d          }t          |t                    rV|                                D ]A\  }}t          |t                    r'|
                    d          rt          |          ||<   B|                    di           }d|vr|                     ||          }	|	|d<   |                    dd           Ft          |d         t                    r+|d         D ] }
|
|d<   |                     ||||           !)|                     ||||           Dt!          j        d|                                             d	 |D             | _        |                     | j                   d S )
Nr  r   os.environ/r   r"  organization)r  r  r  r  z
Initialized Model List c                     g | ]
}|d          S r  r{   rI  s     r}   rO  z)Router.set_model_list.<locals>.<listcomp>  s    @@@AlO@@@r|   )rw  rx  r   r   r   r  r   r  r  r   r  rl   r%  r'  r   r  r   rz  get_model_namesr   _build_model_name_index)r  r   original_model_listr   r  r  r  r  r  _idorgs              r}   r   zRouter.set_model_list  s   "mJ7702-02- ) !	 !	E))L11K#ii(899O/400 ;+1133 ; ;DAq!!S)) ;all=.I.I ;-7]]* %		, ; ;K ;&&--k?KK$'D!"">488D/J JD +>:  C69ON3++(-$/(7$/	 ,     ''$) +$3 +	 (     	#@(<(<(>(>@@	
 	
 	
 A@Z@@@ 	$$T_55555r|   c                 N   dd l }|j        j        }d}d|v r+|                    d          d         }|t          j        v rd}|r	|}d }d }d }	n^t	          j        |j        j        |j                            dd                     \  }}}}	|t          j        vrt          d|           | j
                            |j        j                   |j        j        +t          |dd           t          |d          |j        _        |j        j        +t          |d	d           t          |d	          |j        _        d
|j        v rd| j                            |j        |                    d                     |j        j        r$| j                            |j        j                   |j                            d          }
|j                            d          }|
Z|Xd
|v rT|
| j        vrt/                      | j        |
<   | j        |
                             ||                    d                     |j                            dg           pg }|D ]v}|                    di           }dD ][}||v rU||                             d          r:||                             dd          }|j                            |d          ||<   \w|"|                     |||j        j                   |                     |j                  r|                     |           |S )Nr   Fr  Tr  r   r  zUnsupported provider - r  r  r   r   team_idteam_public_model_namedataSources
parameters)endpointr  r  r  )rs  r  r   )r   r  )osr   r   rq  r   r  rj   r'  provider_listry  r   r   r  r  r  r  r   add_patternr  r   r"  r   r   rp   r  replaceenviron'_initialize_deployment_for_pass_throughr	  r  )r  rs  r)  r  is_prompt_management_modelr  rq  r  r  r,  _team_id_team_public_model_namedata_sourcesdata_sourcer   	param_keyenv_names                    r}   r  zRouter._add_deployment  s   			 #17%*"-"/"5"5c":":1"="g&WWW-1*% 	Q #F"&"OHH ( /5$.$=$A$A)4% %  # #'*??? O:M O OPPP 	$$Z%>%DEEE
 %)1
E400<,3J,F,FJ%) %)1
E400<,3J,F,FJ%) *''' ++%z'9'9t'9'L'L   $' V4;;J<Q<TUUU(,,Y77","7";";<T"U"U '3...t8886H6J6J)(3%h/;;');););)N)N  
 "044]BGGM2' 	E 	EK __\266F0 E E	&&6)+<+G+G+V+V&%i088KKH(*
x(D(DF9%	E *88%$7 /5 9    ***:S*TT 	D,,
,CCCr|   c                 j   |j         j        du r#ddlm} |j         j        t          j        |j         j                  }ni }|dk    r|                    d          p|j         j        }|                    d          p|j         j	        }|                    d          p|j         j
        }||t          d	          |                    |||
           nZ|                    d          p|j         j        }	|                    d          p|j         j        }
|                    ||	|
           	 dS )a*  
        Optional: Initialize deployment for pass-through endpoints if `deployment.litellm_params.use_in_pass_through` is True

        Each provider uses diff .env vars for pass-through endpoints, this helper uses the deployment credentials to set the .env vars for pass-through endpoints
        Tr   )passthrough_endpoint_routerN	vertex_aivertex_projectvertex_locationvertex_credentialsz]vertex_project, and vertex_location must be set in litellm_params for pass-through endpoints.)
project_idlocationr;  r,  ru  )r  r,  ru  )r   use_in_pass_through>litellm.proxy.pass_through_endpoints.llm_passthrough_endpointsr7  litellm_credential_namer    get_credential_valuesr'  r9  r:  r;  rj  add_vertex_credentialsr,  ru  set_pass_through_credentials)r  rs  r  r   r7  credential_valuesr9  r:  r;  r,  ru  s              r}   r.  z.Router._initialize_deployment_for_pass_through  s    $8D@@      (@L$6$L-E% %!! %'!"k11%))*:;; @!0? 
 &))*;<< A!0@  
 &))*>?? D!0C #
 ")_-D$w   ,BB-,'9 C     &))*55 :!09 
 &)))44 9!08  ,HH(;%# I   
 r|   c                 (   |j         j        }|r|                     |          rdS |                    d          }|                     |           |                     ||j         j                   | j                            |j                   |S )z
        Parameters:
        - deployment: Deployment - the deployment to be added to the Router

        Returns:
        - The added deployment
        - OR None (if deployment already exists)
        NTr   r  r  )	r   r"  r  r  r  r  r   r   r  )r  rs  _deployment_model_id_deployments       r}   add_deploymentzRouter.add_deployment  s      *47 	D$5$56J$K$K 	4 !((d(;;
333 	--
(=(@ 	. 	
 	
 	
 	
 5666r|   r?  removal_idxc                     | j                                         D ]\  }}||k    r|dz
  | j         |<   || j         v r
| j         |= dS dS )a"  
        Helper method to update deployment indices after a deployment has been removed from model_list.

        Parameters:
        - model_id: str - the id of the deployment that was removed
        - removal_idx: int - the index where the deployment was removed from model_list
        ro   N)r   r  )r  r?  rI  r  rE  s        r}   (_update_deployment_indices_after_removalz/Router._update_deployment_indices_after_removal  sm     #'"G"M"M"O"O 	O 	OM3[  GJQw5mDt<<<5h??? =<r|   c                    t          | j                  }| j                            |           ||| j        |<   n?|                    di                               d          || j        |d         d         <   |                    d          }|r5|| j        vr
g | j        |<   | j        |                             |           dS dS )a!  
        Helper method to add a model to the model_list and update both indices.

        Parameters:
        - model: dict - the model to add to the list
        - model_id: Optional[str] - the model ID to use for indexing. If None, will try to get from model["model_info"]["id"]
        Nr   r"  r  )rk  r   r   r   r'  r   )r  r   r?  rE  r  s        r}   r  z'Router._add_model_to_list_and_index_map  s     $/""u%%% >AD1(;;YY|R((,,T22>ORD1%2Ed2KL YY|,,
 	J!FFFDF5jA1*=DDSIIIII	J 	Jr|   c                    	 |j         j        pd}|                     |          }|f|j        |j        k    rdS d}|j         j        }| j        }||v r;||         }|1| j                            |           |                     ||           |                     |           |S # t          $ r,}| j
        rt          j        d| d           Y d}~dS |d}~ww xY w)z
        Add or update deployment
        Parameters:
        - deployment: Deployment - the deployment to be added to the Router

        Returns:
        - The added/updated deployment
        r  r  Nr?  rI  r  zError upserting deployment: r  )r   r"  r  r   r   r   r  rK  rH  ry  r   r   rz  )r  rs  rF  _deployment_on_routerrI  r  deployment_fast_mappingr|  s           r}   r&  zRouter.upsert_deployment  sF   &	#-#8#;#Ar :>:M:M- ;N ; ;! %0,0E0TTT4 .2 * 5 8*.*O' $;;;"9-"HK".++K888EE%2 F   
 :666 	 	 	. %+g1ggg   ttttt	s#   6B& A+B& &
C0CCCr"  c                     d}|| j         v r| j         |         }	 |3| j                            |          }|                     ||           |S dS # t          $ r Y dS w xY w)z
        Parameters:
        - id: str - the id of the deployment to be deleted

        Returns:
        - The deleted deployment
        - OR None (if deleted deployment not found)
        NrN  )r   r   r  rK  ry  )r  r"  deployment_idxr  s       r}   delete_deploymentzRouter.delete_deploymentF  s     666!B2FN	)**>::==^ >    t 	 	 	44	s   4A 
AAc                    || j         v r| j         |         }| j        |         }t          |t                    rt	          di |S t          |t                    r|S t          d                    t          |                              dS )l
        Returns -> Deployment or None

        Raise Exception -> if model found in invalid format
        zModel invalid format - {}Nr{   )r   r   r   r  rQ   ry  r  r   )r  r?  rE  r   s       r}   r  zRouter.get_deployment`  s     t<<<7ACOC(E%&& Q!**E***E:.. Q ; B B4;; O OPPPtr|   c                     |                      |          }|dS t          di |j                            d                              d          S )zE
        Returns -> dict of credentials for a given model id
        r  NTr   r{   )r  rN   r   r  )r  r?  rs  s      r}   get_deployment_credentialsz!Router.get_deployment_credentialss  sg     ((((;;
4& 
 
'222EE
 

*$*
'
'	(r|   model_group_namec                    | j         D ]y}|d         |k    rkt          |t                    rt          di |c S t          |t                    r|c S t	          d                    t          |                              zdS )rU  r  zModel Name invalid - {}Nr{   )r   r   r  rQ   ry  r  r   )r  rX  r   s      r}   "get_deployment_by_model_group_namez)Router.get_deployment_by_model_group_name~  s     _ 	S 	SE\"&666eT** S%.......z22 S LLL#$=$D$DT%[[$Q$QRRR 7 tr|   r  c                     d S r8  r{   r  rs  r  r"  s       r}   r  zRouter.get_router_model_info  	     	r|   c                     d S r8  r{   r\  s       r}   r  zRouter.get_router_model_info  r]  r|   c                    |.|                      |          }||                    d          }|t          d          |                    di                               dd          }|*|                    di                               dd          }|}t	          j        |                    di                               d	d
          t          di |                    di                     \  }}}	}	|dk    r|t          j        d| d           n|dk    r|}| j	        
                    |          }
d|v r|
|
D ]}	 |                    di                               d          |                    di                               d          k    r+|                    di                               d	          } n# t          $ r Y w xY w||}|                    d                    |                    sd                    ||          }n|}t	          j        |          }|                    di           }|                    |           |S )a  
        For a given model id, return the model info (max tokens, input cost, output cost, etc.).

        Augment litellm info with additional params set in `model_info`.

        For azure models, ignore the `model:`. Only set max tokens, cost values if base_model is set.

        Returns
        - ModelInfo - If found -> typed dict with max tokens, input cost, etc.

        Raises:
        - ValueError -> If model is not mapped yet
        Nr  Tr   zDeployment not foundr   
base_modelr   r   r  r   r   azurez Could not identify azure model 'z'. Set azure 'base_model' for accurate max tokens, cost tracking, etc.- https://docs.litellm.ai/docs/proxy/cost_tracking#spend-tracking-for-azure-openai-modelsr   r"  z{}/z{}/{}r  r{   )r  r  rj  r'  r   rj   rS   r   r  r   routery  r  r  get_model_infor   )r  rs  r  r"  rG  r`  r   rq  r  rN  potential_modelspotential_modelmodel_info_namer   user_model_infos                  r}   r  zRouter.get_router_model_info  s   & >--r-::K&(333FF
3444  ^^L"5599,MM
#(8"==AA,PTUUJ -4,D..!1266::7BGG)QQJNN;KR,P,PQQ-
 -
 -
)#Q '))j.@!' [6  [  [  [    !G++E#2889LMMe|| 0 <'7 
 
O	*..|R@@DD  '^^L"==AA$GGH H %4$7$78H"$M$M$Q$Q '% %E "EH %    = E-@ A ABB 	$%nn-@%HHOO#O+/BBB
 %..r::/***s   	A=G		
GGc                 b    | j         D ]&}d|v r d|d         v r||d         d         k    r|c S 'dS )z
        For a given model id, return the model info

        Returns
        - dict: the model in list with 'model_name', 'litellm_params', Optional['model_info']
        - None: could not find deployment in list
        r   r"  Nr   )r  r"  r   s      r}   rd  zRouter.get_model_info  sS     _ 	! 	!Eu$$|1D)D)D|,T222 LLLtr|   c                 r    |                      |          }|dS |d         }|                     |          S )zT
        Return list of all models in the same model group as that model id
        )r"  Nr  r  )rd  r  )r  r"  r   r  s       r}   get_model_groupzRouter.get_model_group  sF    
 ((B(//
4-
""j"999r|   r  c           	      4   ddl m} d}d}d}	 t          j                            |          }n# t
          $ r Y nw xY w	 t          j        |          }n# t
          $ r Y nw xY w	 |R|                    dd          }|:t          j        |          }|#|pi } |t          t          |          |          }n# t
          $ r Y nw xY w|G|Et          t           |t          t          |          
                                |                    }n||}|S )z
        For a given model id, return the model info

        1. Check if model_id is in model info
        2. If not, check if litellm model name is in model info
        3. If not, return None
        r   )_update_dictionaryNr  r`  )litellm.utilsrn  r   r  r'  ry  rd  r   r  ra   rw  )	r  r?  r  rn  r   custom_model_infolitellm_model_name_model_infor`  base_model_infos	            r}   get_deployment_model_infoz Router.get_deployment_model_info  s    	544444*.
,0=A%	 ' 2 6 6x @ @ 	 	 	D		,3,B,T,T,T)) 	 	 	D		 ,.22<FF
)&-&<:&N&N&NO&2,=,C),>,> 77-- -)  	 	 	D	 (-J-V""<==BBDD%  JJ +66Js0   . 
;;A 
A"!A"&AB; ;
CCuser_facing_model_group_namec                 
   d}d}d}d}|                      |          }|dS |D ]}d}	d|v r|d         |k    rd}	n d|v r| j                            |          d}	|	s;t          di |d         }
|
j        }|                    di           }|                    di           }d}||                    dd          }||                    dd          }||                    dd          }d}||                    d	d          }||                    d	d          }||                    d	d          }	 |                    d
d          }||                     ||
j                  }nd}n# t          $ r d}Y nw xY wd\  }}	 t          j
        |
j        |
j                  \  }}}}nU# t          j        j        $ r>}t          j        d                    t#          |                               Y d}~nd}~ww xY w|_t          j        ||          }|g }|                    di           }|                    dd          }t'          |ddddd|||d
  
        }|t)          di ||gd|}n||j        vr|j                            |           |                    dd          -|d         %|j        |d         |j        k    r|d         |_        |                    dd          -|d         %|j        |d         |j        k    r|d         |_        |                    dd          %|j        |d         |j        k    r|d         |_        |                    dd          %|j        |d         |j        k    r|d         |_        |                    dd          	 |d         du rd|_        |                    dd          |d         du rd|_        |                    dd          |d         du rd|_        |                    dd          |d         du rd|_        |                    dd          |d         du rd|_        |                    dd          |d         du rd|_         |                    dd          |d         |d         |_!        |                    dd          ||                    d          }|                    d	d          ||                    d	          }|	|d}||z  }|	|d}||z  }||||_"        |||_#        |||_        |S ) z
        For a given model group name, return the combined model info

        Returns:
        - ModelGroupInfo if able to construct a model group
        - None if error constructing model group info
        Nr  Fr  Tr   r   r  r  r"  )r?  r  )r  r  r#  z.litellm.router.py::get_model_group_info() - {}moder   r   )
r  
max_tokensmax_input_tokensmax_output_tokensinput_cost_per_tokenoutput_cost_per_tokenlitellm_providerrv  supported_openai_paramssupports_system_messages)r  	providersrx  ry  rz  r{  "supports_parallel_function_callingsupports_visionsupports_function_callingsupports_web_searchsupports_url_contextsupports_reasoningr}  r{   )$r  r   rc  rS   #configurable_clientside_auth_paramsr'  rs  r   ry  r   rj   r  
exceptionsBadRequestErrorr   r  r  r   get_supported_openai_paramsModelMapInforU   r  r   rx  ry  rz  r{  r  r  r  r  r  r  r}  r  r  )r  r  rt  model_group_info	total_tpm	total_rpmr  r   r   is_matchr   model_litellm_paramsmodel_info_dict_deployment_tpm_deployment_rpmr?  r   r  r  rN  r|  r}  db_model_inforv  s                           r}   _set_model_group_infozRouter._set_model_group_info@  s    6:#'	#'	SW+((K(@@
4 E	- E	-EH%%%*=*L*L%%'--k::F +FFe4D.EFFN B 0
 $)99-=r#B#B #iib99O .2O&"'))E4"8"8&"6":":5$"G"G&"1"5"5eT"B"B .2O&"'))E4"8"8&"6":":5$"G"G&"1"5"5eT"B"B	"*..tT::'!%!?!?!)n6J "@ " "JJ "&J " " "!


" +1'M<4;4L(.(6(J5 5 51|Q %5   %+DKKCPQFFSS       
 !*1*M'\+ + +' +2.0+ !&		, ; ;$((88)##%)&*)**+%1,C-1  
  '#1 $ $'C&2^  %$ $    '7'AAA$.55lCCCNN#5t<<H"#56B(9A%&89*;< < 9CCU8V$5NN#6==I"#67C(:B%&9:*<= = :DDW9X$6>>"8$??K$9A!"89&;< < =G.=$9 >>"94@@L$:B!"9:&<= = >H/>$: NN#GNN "#GHDPPJN$GNN#4d;;G"#45==7;$4NN#>EEQ"#>?4GGAE$>NN#8$??K"#89TAA;?$8NN#94@@L"#9:dBB<@$9 NN#7>>J"#78D@@:>$7NN#<dCCO"#<=I?I1@$< >>%..:?V&0nnU&;&;O>>%..:?V&0nnU&;&;O*$ !I_,	*$ !I_,	'$'0 $$'0 $ 3>7 !D  s*   :7E22F F
%F00H4G==Hc                    || j         v rh| j         |         }t          |t                    r|}n,t          |t                    r|d         du rdS |d         }ndS |                     ||          S |                     ||          S )z
        For a given model group name, return the combined model info

        Returns:
        - ModelGroupInfo if able to construct a model group
        - None if error constructing model group info or hidden model group
        hiddenTNr   )r  rt  )r   r   r   r  r  )r  r  r  _router_model_groups       r}   get_model_group_infozRouter.get_model_group_info(  s     $000)+6D$$$ &*##D$'' >T))4*.w-''t--/-8 .    ))#+ * 
 
 	
r|   c                 `  K   t                      }|                    d          }g }g }|                     |          }|dS |D ]}|                    di                               d          }|d                             d          }	||	K|                    t
          j        j                            ||	|	                     |                    t
          j	        j                            ||	|	                     ||z   }
| j
                            |

           d{V }|dS |dt          |                   }|t          |          d         }d}|#|D ] }t          |t                    r	|d}||z  }!d}|#|D ] }t          |t                    r	|d}||z  }!||fS )z
        Returns current tpm/rpm usage for model group

        Parameters:
        - model_group: str - the received model name from the user (can be a wildcard route).

        Returns:
        - usage: Tuple[tpm, rpm]
        r  r  Nr8  r   r"  r   r   )r"  r   r  )r  r   )rm   r  r  r'  r   rX   r  r.  r  r  r   async_batch_get_cacherk  r   r   )r  r  r  r  tpm_keysrpm_keysr   r   r"  r  combined_tpm_rpm_keyscombined_tpm_rpm_valuestpm_usage_listrpm_usage_list	tpm_usagerl  	rpm_usages                    r}   get_model_group_usagezRouter.get_model_group_usageG  sD      
 
 ! ((K(@@
: 	 	E %		, ; ; ? ? E EB+01A+B+F+F, ,M z]2OO#)00'#1 1     OO#)00'#1 1      !)8 3(,
(H(H& )I )
 )
 #
 #
 #
 #
 #
 #
 #*:)@3x==)Q)@X)Q $(	%# # #a%% # ($%	NI#'	%# # #a%% # ($%	NI)##r|   )maxsizec                 ,    |                      |          S )z
        Cached version of get_model_group_info, uses @lru_cache wrapper

        This is a speed optimization, since set_response_headers makes a call to get_model_group_info on every request
        )r  )r  r  s     r}   _cached_get_model_group_infoz#Router._cached_get_model_group_info  s     ((555r|   c                   K   |                      |          }||j        |j        }nd }||j        |j        }nd }||i S |                     |           d {V \  }}i }|||pdz
  |d<   ||d<   |||pdz
  |d<   ||d<   |S )Nr   x-ratelimit-remaining-tokenszx-ratelimit-limit-tokensx-ratelimit-remaining-requestszx-ratelimit-limit-requests)r  r  r  r  )r  r  r  	tpm_limit	rpm_limitcurrent_tpmcurrent_rpmreturned_dicts           r}   get_remaining_model_group_usagez&Router.get_remaining_model_group_usage  s      <<[II',<,@,L(,III',<,@,L(,III!2I)-)C)CK)P)P#P#P#P#P#P#P [ <E q=M89 9BM45 >G q?M:; ;DM67r|   c                   K   t          |t                    rt          |d          rt          |j        t                    r~|j                            di            ||j        d         d<   |j        d         }d|vrBd|vr>|<|                     |           d{V }|                                D ]\  }}||||<   |S )a  
        Add the most accurate rate limit headers for a given model response.

        ## TODO: add model group rate limit headers
        # - if healthy_deployments > 1, return model group rate limit headers
        # - else return the model's rate limit headers
        rv  r  zx-litellm-model-groupr  r  N)r   r   r  rv  r  r   r  r  )r  r  r  r  remaining_usageheaderr.  s          r}   rv  zRouter.set_response_headers  s      x++	;"233	; 82D99	;
 #../CRHHH  #$89' "*!89M!N /6HHH4<NNN+(,(L(L) ) # # # # # # &5%:%:%<%< ; ;MFE(5:*62r|   c                     | j                                          t          |          D ]O\  }}|                    d          }|r3|| j         vr
g | j         |<   | j         |                             |           PdS )z
        Build model_name -> deployment indices mapping for O(1) lookups.
        
        This index allows us to find all deployments for a given model_name in O(1) time
        instead of O(n) linear scan through the entire model_list.
        r  N)r   clearrR  r'  r   )r  r   rE  r   r  s        r}   r  zRouter._build_model_name_index  s     	-33555#J// 	N 	NJC<00J NT%JJJHJD9*E5jAHHMMM	N 	Nr|   c                 r   g | _         t          |          D ]\  }}|                    di           }|                    d          }|V|                    dd          }|                    di           }|                     ||          }d|vri |d<   ||d         d<   |                     ||           dS )z
        Build model index from model list to enable O(1) lookups immediately.
        This is called during initialization to avoid the race condition where
        requests arrive before model_id_to_deployment_index_map is populated.
        r   r"  Nr  r  r   r  )r   rR  r'  r%  r  )r  r   rN  r   r   r?  r  r   s           r}   r   z.Router._build_model_id_to_deployment_index_map  s     !*-- 	R 	RHAu<44J!~~d++H "YY|R88
!&+;R!@!@22:~NNu,,*,E,',4l#D)111QQQQ	R 	Rr|   exclude_team_modelsc                    g }| j         D ]w}d|v rqd|d         v rg|d         d         }|r|d                             d          r<|"|d         |k    r|                    |           `||                    |           x|S )z\
        if 'model_name' is none, returns all.

        Returns list of model id's.
        r   r"  r$  Nr  )r   r'  r   )r  r  r  idsr   r"  s         r}   get_model_idszRouter.get_model_ids  s     _ 	# 	#Eu$$|1D)D)D<(.& 5+>+B+B9+M+M )eL.AZ.O.OJJrNNNN'JJrNNN
r|   candidate_idc                     || j         v S )a  
        O(1) membership check for a deployment ID without allocating large lists.

        Note: Call sites may pass a variable named `model` when it actually
        contains a deployment ID. This helper expects the deployment ID string.

        Uses the existing `model_id_to_deployment_index_map` which is kept
        in sync by `_build_model_id_to_deployment_index_map` and model-list
        mutation helpers.
        )r   )r  r  s     r}   r  zRouter.has_model_id  s     tDDDr|   team_model_namer$  c                     |                      ||          }|sdS |D ]F}|                    di                               d          |k    r|                    d          c S GdS )z
        Map a team model name to a team-specific model name.

        Returns:
        - deployment id: str - the deployment id of the team-specific model
        - None: if no team-specific model name is found
        r  r$  Nr   r$  r  )r  r'  )r  r  r$  r;  r   s        r}   map_team_modelzRouter.map_team_model)  s     $$$QQ 	4 	/ 	/Eyyr**..y99WDDyy..... E tr|   c                     |@|d                              d          |k    r!||d                              d          k    rdS ||d         |k    rdS dS )zU
        Get the team-specific model name if team_id matches the deployment.
        Nr   r$  r%  Tr  Fr'  )r  r  r   r$  s       r}   should_include_deploymentz Router.should_include_deployment;  sk     l#''	22g==eL1556NOOOO4#l(;z(I(I4ur|   model_aliasc                    g }|| j         v r~| j         |         }|D ]m}| j        |         }|                     |||          rF|/t          j        |          }||d<   |                    |           X|                    |           nnz|xt          | j                  D ]c\  }}|                     |||          rF|/t          j        |          }||d<   |                    |           N|                    |           d|S )z
        Return all deployments of a model name

        Used for accurate 'get_model_list'.

        if team_id specified, only return team-specific models
        
        Optimized with O(1) index lookup instead of O(n) linear scan.
        )r  r   r$  Nr  )r   r   r  rw  rx  r   rR  )	r  r  r  r$  returned_modelsindicesrE  r   alias_models	            r}   _get_all_deploymentszRouter._get_all_deploymentsK  sW    68 >>>;JGG  
6 
6,11) 2   6 #.&*mE&:&:4?L1'..{;;;;'..u555
6   (88 	6 	6
U11) 2   6 #.&*mE&:&:4?L1'..{;;;;'..u555r|   c                 D   |                                  pg }g }|D ]}|                    d          }|                     |          r/|                     ||          }|r|                    |           [|                    |                    dd                     |S )u  
        Returns all possible model names for the router, including models defined via model_group_alias.

        If a team_id is provided, only deployments configured with that team_id (i.e. team‐specific models)
        will yield their team public name.
        r   )rs  r$  r  r  )r  r'  _is_team_specific_model_get_team_specific_modelr   )r  r$  deploymentsr   rs  r   r  s          r}   r  zRouter.get_model_names}  s     ))++1r% 		E 		EJ#55J++J77 E"&"?"?)7 #@ # # # 8&&777"":>>,#C#CDDDDr|   c                     |                     d          pi }|dS ||                     d          k    r|                     d          S dS )a  
        Get the team-specific model name if team_id matches the deployment.

        Args:
            deployment: DeploymentTypedDict - The model deployment
            team_id: Optional[str] - If passed, will return router models set with a `team_id` matching the passed `team_id`.

        Returns:
            str: The `team_public_model_name` if team_id matches
            None: If team_id doesn't match or no team info exists
        r   Nr$  r%  r  )r  rs  r$  r   s       r}   r  zRouter._get_team_specific_model  sT     &0^^L%A%A%GR
4jnnY////>>":;;;tr|   r   c                 J    t          |o|                    d                    S )z
        Check if model info contains team-specific configuration.

        Args:
            model_info: Model information dictionary

        Returns:
            bool: True if model has team-specific configuration
        r$  )rr  r'  )r  r   s     r}   r  zRouter._is_team_specific_model  s#     J<:>>)#<#<===r|   c                 J   g }| j                                         D ]\  }}|||k    rt          |t                    r|}n6t          |t                    r t          di |}|d         du rR|d         }n\|                    |                     ||                     |S )z
        Helper function to get model list from model alias.

        Used by `.get_model_list` to get model list from model alias.
        Nr  Tr   )r  r  r{   )r   r  r   r   r  rZ   r+  r  )r  r  r  r  model_value_router_model_name_model_values          r}   get_model_list_from_model_aliasz&Router.get_model_list_from_model_alias  s     68(,(>(D(D(F(F 	 	$K%+*C*C+s++ 	*5""K.. 8GG;GG)T11)5g)>&&""))1{ *      r|   c                    g }|*|                     |                     ||                     |                     |                     |                     t          |          dk    r| j                            |          pg }|@|| j        v r7| j        |                             |          pg }|                     |           |-|+|D ](}t          di |}||d<   |                    |           )|
|| j	        z  }|S )z
        Includes router model_group_alias'es as well

        if team_id specified, returns matching team-specific models
        Nr  r  r   r  r{   )
r+  r  r  rk  r   rc  r   rR   r   r   )r  r  r$  r  potential_wildcard_models#potential_team_only_wildcard_modelsr  deployment_typed_dicts           r}   r  zRouter.get_model_list  s`    68!""))Z)QQ   	00J0GG	
 	
 	
 1$$(,(;(A(A*(M(M(SQS% "w$2K'K'K-g6<<ZHHNB 4 *007   %*C*O2 B BA,?,D,D!,D,D):D),7#**+@AAAAt.Or|   model_access_groupc                 p   ddl m}  |t                    }|                     ||          }|r|D ]}|                    d          }|rj|                    dg           pg D ]Q}	|*|	|k    r#|d         }||	                             |           .|d         }||	                             |           R|S )a  
        If model_name is provided, only return access groups for that model.

        Parameters:
        - model_name: Optional[str] - the received model name from the user (can be a wildcard route). If set, will only return access groups for that model.
        - model_access_group: Optional[str] - the received model access group from the user. If set, will only return models for that access group.
        - team_id: Optional[str] - the team id, to resolve team-specific models
        r   r   r  r   r   Nr  )collectionsr   r   r  r'  r   )
r  r  r  r$  r   r   r   r  r  groups
             r}   get_model_access_groupszRouter.get_model_access_groups   s     	,+++++#D))((J(PP
 	D 
D 
DeeL11 D!,"!E!E!K D D-9$(:::-.|_
 -e 4 ; ;J G G G)*<J)%077
CCCCr|   c                     |                      |          }t          |          dk    rdS |                    |g           }|D ] }| j                            |           dS !dS )zG
        Return True if model access group is a wildcard route
        )r  r   Fr  NT)r  rk  r'  r   rc  )r  r  r   r;  r   s        r}   )_is_model_access_group_for_wildcard_routez0Router._is_model_access_group_for_wildcard_route"  s     441 5 
 
 }""5""#5r:: 	 	E"(((77Ctt D ur|   c                     t          |           }i }g d}|D ]C}||v r||         ||<   |dk    r,| j        dk    r!| j        j                                        ||<   D|S )a  
        Get router settings method, returns a dictionary of the settings and their values.
        For example get the set values for routing_strategy_args, routing_strategy, allowed_fails, cooldown_time, num_retries, timeout, max_retries, retry_after
        )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )varsr   r4  r,  r  )r  	_all_vars_settings_to_returnvars_to_includevars        r}   get_settingszRouter.get_settings9  s    
 JJ	 
 
 
  # 	Y 	YCi+4S>#C(...)-DDD+/+D+Q+V+V+X+X#C(""r|   c                    g d}g d}|                                  }|D ]}||v r||v r't          ||                   }t          | ||           1|dk    rC|d         ||         k    r1|                     ||         |                    di                      t          | |||                    t          j        d                    |                     t          j        d|                                              dS )	z-
        Update the router settings.
        )r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   zSetting {} is not allowedzUpdated Router settings: N)r  r   r  r   r'  r   rz  r  )r  r  _allowed_settings_int_settings_existing_router_settingsr  _casted_values          r}   update_settingszRouter.update_settingsZ  sO   

 
 

 
 
 %)$5$5$7$7! 	U 	UC'''-''$'s$4$4MD#}5555 11156HIVTW[XX22-3C[28** 73 3 3    D#vc{3333%+,G,N,Ns,S,STTTT#$U@Q@Q@S@S$U$UVVVVVr|   c                    |d         d         }t          |          }|dk    rid                    |          }| j                            |d|          }|3t	          j        | |           | j                            |d|          }|S |d	k    r_|                    d
          du r$| d}| j                            |d|          }|S | d}| j                            |d|          }|S |                    d
          du r#| d}| j                            ||          }|S | d}| j                            ||          }|S )a  
        Returns the appropriate client based on the given deployment, kwargs, and client_type.

        Parameters:
            deployment (dict): The deployment dictionary containing the clients.
            kwargs (dict): The keyword arguments passed to the function.
            client_type (str): The type of client to return.

        Returns:
            The appropriate client based on the given client_type and kwargs.
        r   r"  r  z{}_max_parallel_requests_clientTr  r  r  N)r  r   r4  r  _stream_async_client_async_client_stream_client)r  r  _client)r   r  r   r  r0    set_max_parallel_requests_clientr'  )r  rs  r  r  r?  r  	cache_keyr  s           r}   r  zRouter._get_client  s    l+D1+LV+T+T1119@@JJIZ))$AQ *  F ~%F,0
    --!dEU .   MG##zz(##t++'===	--!dEU .   '666	--!dEU .   zz(##t++'777	--!4D .   '000	--!4D .   r|   c           	      
   t          j        d|            t          j        |          }g }	 t	          j        |          }nM# t          $ r@}t          j        d                    t          |                               |cY d}~S d}~ww xY wd}	d}
d}t          |          }t                      }|                    d          }| d| }| j                            |d	|
          pi }t          |          D ]v\  }}|                    di           }|                    di           }	 |                    dd          }||                    dd          }|                     ||          }|p|                    dd          }t%          |t&                    rv|                    dd          `t%          |d         t(                    rE||d         k    r9|                    |           d	}	|
d                    ||d         |          z  }
nK# t          $ r>}t          j        d                    t          |                               Y d}~nd}~ww xY w|                    dd          }| j                            |d	|
          pd}t%          |t&                    r| j        dk    r|                    |d          ||<   t1          |||                   }t%          |t&                    rV|                    dd          @t%          |d         t(                    r%|d         |k    r|                    |           d	}V|^|                    d          I|                    d          }|2t3          t5          d!i ||          s|                    |           |t          j        du rt	          j        |t5          d!i |          \  }}}}t	          j        ||          }|t          j                            |          }dg}|                                 D ]9\  } }!| |vr0| |v r,t          j        d|             |                    |           :xtC          |          tC          |          k    rC	 |d	u rtE          |          |	d	u r*t	          j#        d                    |
          |d           tC          |          dk    r'tI          |          D ]}|%                    |           tC          |          dk    rt          j        &                    |          }|S )"a  
        Filter out model in model group, if:

        - model context window < message length. For azure openai models, requires 'base_model' is set. - https://docs.litellm.ai/docs/proxy/cost_tracking#spend-tracking-for-azure-openai-models
        - filter models above rpm limits
        - if region given, filter out models not in that region / unknown region
        - [TODO] function call and model doesn't support function calling
        z2Starting Pre-call checks for deployments in model=)r~  zllitellm.router.py::_pre_call_checks: failed to count tokens. Returning initial list of deployments. Got - {}NFr  r  z:rpm:Tr  r   r   r`  r  r   rx  z%Model={}, Max Input Tokens={}, Got={}zAn error occurs - {}r"  r   r   r  allowed_model_region)r   r  ra  r#  )passed_paramsresponse_formatz1INVALID MODEL INDEX @ REQUEST KWARG FILTERING, k=r  z~litellm._pre_call_checks: Context Window exceeded for given call. No models have context window large enough for this call.
{}r  r{   )'r   rz  rw  rx  r   token_counterry  r  r  r   r   rm   r  r   r  rR  r'  r  r   r  r   r   r  r   maxrn   rS   drop_paramsrj   r  r  get_non_default_paramsr  rk  r\   rI  reversedr  _get_order_filtered_deployments)"r  r   r   r~  r  _returned_deploymentsinvalid_model_indicesinput_tokensr|  _context_window_error_potential_error_str_rate_limit_errorr  r  r  r  model_group_cacherE  rs  r  r  r`  r   r?  current_request_cache_localcurrent_requestr  r  rN  r}  r  special_paramsr  r  s"                                     r}   _pre_call_checkszRouter._pre_call_checks  s     	#HHH	
 	
 	
 !%.A B B "	)"0(CCCLL 	) 	) 	)!'~  F  FFF   
 )((((((	) !&!!<^LL W--1111J  ?O !    	 	  ))>?? k	> k	>OC(nn-=rBBO$..r::KW(__\4@@
%!0!4!4\4!H!HJ!77)u 8  
 #Ho&9&9'4&H&H z400!"'94@@L #:.@#A3GG!(:6H+III-44S99904-,CJJ %z2D'E| ,
 ! W W W%/0F0M0McRSff0U0UVVVVVVVVW #tR00H 
$$ TDT %    	 ( ,d33!)-EEE.?.C.CHa.P.P!(+"%/1B81L# #
 55
!'++E488D #?5#93??!+E2oEE-44S999,0)  *"&&'=>>J'5'9'9:P'Q'Q$'3,'5'H'H'H'H-A   ! .44S999  )g.AU.J.J3:3K0Q0Q0Q0Q4 4 40*Aq +2*M5H+ + +' +2 *1)M)M&4 *N * *& '8%8N 2 8 8 : : > >1$;;;^@S@S17 WTU W W   288===$%%-B)C)CCC !D((/    '$..8 ^  e  e,   !#    $%%)) 566 / /%))#.... $%%))$+M$Q$Q%% %! %$s6   A 
B5B
B
B2C&H
I#%4II#c                 v    || j         vrdS | j         |         }t          |t                    r|}n|d         }|S )z
        Get the model from the alias.

        Returns:
        - str, the litellm model name
        - None, if model is not in model group alias
        Nr   )r   r   r   )r  r   _items      r}   r  zRouter._get_model_from_alias  sK     ...4&u-eS!! 	#EE'NEr|   c                 *    fd| j         D             S )z6
        Get the deployment by litellm model.
        c                 8    g | ]}|d          d         k    |S )r   r   r{   )rJ  r  r   s     r}   rO  z;Router._get_deployment_by_litellm_model.<locals>.<listcomp>  s.    TTTaa0@.A'.Je.S.S.S.S.Sr|   rj  )r  r   s    `r}    _get_deployment_by_litellm_modelz'Router._get_deployment_by_litellm_model  s"     UTTT4?TTTTr|   r  c                    d}|X|                     d          pi }|                     d          pi }|                     d          p|                     d          }|du r||                     |          fS |                     |          rO|                     |          }	|	$|	j        j        }
|
|	                    d          fS t          d	| d
          |                     |          }||}|| j	        vr| j
                            |          }|r||fS |0|| j        v r'| j        |                             |          }|r||fS | j        (t          j        | j                  }||d         d<   ||fS |                     |          }t#          |          dk    r|                     |          }t%          j        d|            t#          |          dk    r`|                     |          d| d                    |          }nd| d                    |          }t-          j        ||d          t,          j        r |t,          j        v rt,          j        |         }||fS )aF  
        Common checks for 'get_available_deployment' across sync + async call.

        If 'healthy_deployments' returned is None, this means the user chose a specific deployment

        Returns
        - str, the litellm model name
        - List, if multiple models chosen
        - Dict, if specific model chosen
        Nr   r  user_api_key_team_idTr  r  r   zBLiteLLM Router: Trying to call specific deployment, but Model ID :z does not exist in Model ID mapr   r   r  r   zinitial list of deployments: zYou passed in model=z+. There is no 'model_name' with this stringz1. There are no healthy deployments for this modelr  r  )r'  r  r  r  r   r   r  rj  r  r   r   get_deployments_by_patternr   r   rw  rx  r  rk  r   rz  r  r  r   r  model_alias_map)r  r   r~  r  r  r  request_team_idr   r  rs  deployment_model_model_from_aliaspattern_deploymentsupdated_deploymentr   r  s                   r}   r  z*Router._common_checks_available_deployment  sT   & *.%%))*55;H-112DEEK&ll&  >!%%&<==  $&&$??e?LLLLu%% 	,,e,<<J%#-#<#B ')>)>D)>)Q)QQQ{UZ{{{   !66U6CC(%E((("&"5"P"P #Q # # # 2111  +#t'@@@&*&?#',, -   $
 ' 6 "555 &2%)]+& &" AF"#34W=000 #7757II"##q(("&"G"Ge"G"T"T#A,?AA	
 	
 	
 "##q((""e"44<ccccjj  jiiipp  )    " 	u0G'G'G+E )))r|   c           	        K   ddl m} |                     |||||          \  }} |||          }t          |t                    r|S t          | |           d{V }	t          j        d|	            t          j        d|	            |                     ||		          }| 	                    ||| t          t          t                   |          nd||
           d{V }| j        r9|7|                     |t          t          t                   |          ||          }t!          | ||||                     |                     d{V }t%          |          dk    rt'          | ||           d{V }
|
|S )z
        Get the healthy deployments for a model.

        Returns:
        - List[Dict], if multiple models chosen
        *OR*
        - Dict, if specific model chosen
        r   )filter_team_based_modelsr   r~  r  r  r  )r   r  rH  Nzasync cooldown deployments: zcooldown_deployments: r   cooldown_deploymentsr  r   r   r~  r  )llm_router_instancer   r  r   r  r  r   r  )r  r  r  r   r  r5   r   rz  _filter_cooldown_deploymentsr  r   r   rE   r   r  r
   r*   r  rk  r=   )r  r   r  r~  r  r  r  r  r   r  r  s              r}   r  z$Router.async_get_healthy_deployments  s     " 	ONNNNN%)%M%M 3) &N &
 &
"" 76 3)
 
 

 )400 	'&&%D$(;K&
 &
 &
  
  
  
  
  
  
 	#A+?AA	
 	
 	
 	#$S=Q$S$STTT"?? 3!5 @ 
 

 %)$J$J 3:B:NT*+X666TX)- %K %
 %
 
 
 
 
 
 
 & 	8+?"&"7"7$(d5H$I$I!-	 #8 # # %< $) 3#'#O#O$ $%
 %
 %
 
 
 
 
 
 
 "##q((A(,!1        I
 O""r|   c           
      &  K   | j         dk    rF| j         dk    r;| j         dk    r0| j         dk    r%| j         dk    r|                     |||||          S 	 t          |          }|                     |||||           d{V }||j        }|j        }|                     ||||||	           d{V }t          |t                    r|S t          j	                    }	| j         dk    r,| j
        %| j
                            ||||
           d{V }
n| j         dk    r,| j        %| j                            ||||
           d{V }
n| j         dk    r-| j        &| j                            |||||           d{V }
nT| j         dk    rt          | ||          S | j         dk    r*| j        #| j                            ||           d{V }
nd}
|
t!          | ||           d{V }|t#          j        d| d|                     |
           d|            t          j	                    }||	z
  }t)          j        | j                            t0          j        |d||	|                     |
S # t4          $ r}t7          j                    }|o|                    dd          }|Wt=          j        |j         ||f          !                                 t)          j        |"                    ||                     |d}~ww xY w)z
        Async implementation of 'get_available_deployments'.

        Allows all cache calls to be made async => 10x perf impact (8rps -> 100 rps).
        r   r   r   r   r   r  r   r  r~  r  r  N)r   r  r~  r  r  r  r  r   r~  r  )r  r   r~  r  r  r  r   r   r  r   r  $get_available_deployment for model: , Selected deployment:  for model: z2<routing_strategy>.async_get_available_deployments)r  r  rE  r  r  r  r  r  )#r   r  r   async_pre_routing_hookr   r~  r  r   r  r  r2  async_get_available_deploymentsr6  r4  r)   r   r=   r   r  r}  r  r  r   r  r^   r  ry  r  r  r'  r  r  r  r  r  )r  r   r  r~  r  r  r  pre_routing_hook_responser   r  rs  r  r  r  r|  r  r  s                    r}   r   z%Router.async_get_available_deployment[  s      !%===%)999%)===%)@@@%5500!$7- 1   	@PP /3.I.I-!$7 /J / / ) ) ) ) ) )% )4174= )-(J(J-!$7!1 )K ) ) # # # # # # -t44 +**J%)AAA,8 2RR$),?!)#	 S         
 %)===*6 0PP$),?!)#	 Q         
 %)@@@-9 3SS$),?!)#'5 T         
 &*:::%(,(;    %55)5 /OO$),? P         
 "
!"E,0%5# # #      	
  !& Lu  L  LUYUjUjkuUvUv  L  L  EJ  L  L   y{{H :-I'BB(/&R%5)% C  	 	 	  	 	 	"+"6"8"8),001FMM*$*:!45   eggg'#99!=PQQ   G!	s(   A6I; CI; "CI; ;
LBLLc                 r   K   || j         v r+| j         |                             |||||           d{V S dS )z
        This hook is called before the routing decision is made.

        Used for the litellm auto-router to modify the request before the routing decision is made.
        r!  N)r   r(  )r  r   r  r~  r  r  s         r}   r(  zRouter.async_pre_routing_hook  sp        D%%%*51HH-!$7 I          tr|   c                    |                      ||||          \  }}t          |t                    r|S t          |          }t	          | |          }|                     ||          }| j        r||                     ||||          }t          |          dk    r[| 	                    |          }	| j
                            |	|          }
t	          | |          }t          ||
| j        |	          | j        d
k    r$| j        | j                            ||          }n| j        dk    rt!          | ||          S | j        dk    r%| j        | j                            |||          }nd| j        dk    r&| j        | j                            ||||          }n3| j        dk    r&| j        | j                            ||||          }nd}|st)          j        d| d           | 	                    |          }	| j
                            |	|          }
t	          | |          }t          ||
| j        |	          t)          j        d| d|                     |           d|            |S )zB
        Returns the deployment based on routing strategy
        )r   r~  r  r  rH  r  Nr  r   r  )	model_idsr  )r   r   r   cooldown_listr   r$  r   r#  r   )r  r   r  r   r"  r   r%  z, No deployment availabler&  r'  )r  r   r  r   r7   r  r   r  rk  r  r   get_min_cooldownr[   r   r   get_available_deploymentsr)   r4  r   r2  r   r  r}  )r  r   r~  r  r  r  r   r  r  r-  _cooldown_time_cooldown_listrs  s                r}   r  zRouter.get_available_deployment  s    &*%M%M 3	 &N &
 &
"" )400 	'&&+L,
 ,
  9$(;K 
  
  
 #?? 3!5 @ 
 
 & 	8+?"&"7"7$7!-	 #8 # # "##q((**e*<<I!0AA#6F B  N 7(,?O  N ','+'B,	     L00T5J5V.HH!7J I  JJ "&666 "$($7    !%<<<)52LL!$7- M  JJ !%:::%1.HH!$7!	 I  JJ !%===(41KK!$7!	 L  JJ J!&WuWWW   **e*<<I!0AA#6F B  N 7(,?O  N ','+'B,	    	" H5  H  HQUQfQfgqQrQr  H  H  AF  H  H	
 	
 	
 r|   r  c                 l    t          j        d|            t          |          fd|D             S )a  
        Filters out the deployments currently cooling down from the list of healthy deployments

        Args:
            healthy_deployments: List of healthy deployments
            cooldown_deployments: List of model_ids cooling down. cooldown_deployments is a list of model_id's cooling down, cooldown_deployments = ["16700539-b3cd-42f4-b426-6a12a1bb706a", "16700539-b3cd-42f4-b426-7899"]

        Returns:
            List of healthy deployments
        zcooldown deployments: c                 4    g | ]}|d          d         v|S )r   r"  r{   )rJ  rs  cooldown_sets     r}   rO  z7Router._filter_cooldown_deployments.<locals>.<listcomp>  s8     
 
 
%,'-\AA AAAr|   )r   rz  r  )r  r   r  r5  s      @r}   r  z#Router._filter_cooldown_deployments  sY     	#$S=Q$S$STTT/00
 
 
 
)<
 
 
 	
r|   c                    	 |                     di                                dd          }|||                     ||           dS dS dS # t          $ r/}t          j        dt          |                      Y d}~dS d}~ww xY w)z7
        Tracks successful requests rpm usage.
        r   r"  Nz$Error in _track_deployment_metrics: )r'  r  ry  r   r  r   )r  rs  r  r  r?  r|  s         r}   r  z Router._track_deployment_metrics  s    		Y!~~lB77;;D$GGH'&& "2      ''  	Y 	Y 	Y!'(WsSTvv(W(WXXXXXXXXX	Ys   AA 
B$B  Br  c                 <    t          ||| j        | j                  S )N)r  r  r   r   )"_get_num_retries_from_retry_policyr   r   )r  r  r  s      r}   r<   z(Router.get_num_retries_from_retry_policy  s-     2#%)%B*	
 
 
 	
r|   c                    | j         }|dS t          |t          j                  r|j        |j        S t          |t          j                  r|j        |j        S t          |t          j                  r|j        |j        S t          |t          j	                  r|j
        |j
        S t          |t          j                  r|j        	|j        S dS dS )a  
        BadRequestErrorRetries: Optional[int] = None
        AuthenticationErrorRetries: Optional[int] = None
        TimeoutErrorRetries: Optional[int] = None
        RateLimitErrorRetries: Optional[int] = None
        ContentPolicyViolationErrorRetries: Optional[int] = None
        N)r   r   r   r  BadRequestErrorAllowedFailsr}  AuthenticationErrorAllowedFailsr
  TimeoutErrorAllowedFailsry  RateLimitErrorAllowedFailsr  'ContentPolicyViolationErrorAllowedFails)r  r  r   s      r}   get_allowed_fails_from_policyz$Router.get_allowed_fails_from_policy  s     >B=V'4 y'"9::	D$@L'CCy'"=>>	H$DP'GGy'/22	A$=I'@@y'"899	C$?K'BBy'"EFF	P$LX'OO		P 	PXXr|   c                     ddl m} | j        d S | j        } ||j        dg|j                  }|| _        t          j                            |           t          j        	                    |j
                   t          j        d           d S )Nr   )SlackAlertingslack)alerting_thresholdalertingdefault_webhook_urlz2[94m
Initialized Alerting for litellm.Router[0m
)1litellm.integrations.SlackAlerting.slack_alertingrA  r   rC  webhook_urlslack_alerting_loggerr   r   r/  r  !response_taking_too_long_callbackr   r  )r  rA  router_alerting_config_slack_alerting_loggers       r}   r  zRouter._initialize_alerting  s    SSSSSS'F151E!.5HY 6 B"
 "
 "
 &<"(==>TUUU(EE"D	
 	
 	
 	"H	
 	
 	
 	
 	
r|   CustomRoutingStrategyc                 ^    t          | d|j                   t          | d|j                   dS )a  
        Sets get_available_deployment and async_get_available_deployment on an instanced of litellm.Router

        Use this to set your custom routing strategy

        Args:
            CustomRoutingStrategy: litellm.router.CustomRoutingStrategyBase
        r  r   N)r  r  r   )r  rL  s     r}   set_custom_routing_strategyz"Router.set_custom_routing_strategy  sL     	&!:	
 	
 	

 	,!@	
 	
 	
 	
 	
r|   c                 P    d t           _        | j                                         d S r8  )r   r   flush_cacherB  s    r}   rP  zRouter.flush_cache  s#    
     r|   c                     g t           _        g t           _        g t           _        g t           _        d | _        |                                  d S r8  )r   r   r   r  r  r   rP  rB  s    r}   resetzRouter.reset  sB    #% *,'#% *,' r|   )F)r   r8  )FFF)T)r/  r8  )NNNN)NNNNN)NF)NNN)NNFN)NNF)rw   rx   ry   r   r   __annotations__r   r   rr  r   r   tenacityr   r$   r   r'   r   r   r   r	   r   rY   rR   r
   r   rM   r  rT  floatr   rZ   rW   rL   rV   r_   rK   r  r  r$  staticmethodr   r   r   r)  r]   r   r  r  r   r  r}  rg   re   r  r  r   rE   r  r  r  r  r  rQ   r*  r  r  r6  r9  r0  rW  r[  r]  r  r   r  r  r  r  r  r  r  rF   r  r  r  r  r  r  r  r  r  r  r  r  r  rf   r  r  r  r  rG   r  r  r`   r  r  r  r%  r.  r9  r6  r5  r   r4  ry  r  r!   wrapr  rU  rV  rf  re  rg  r  rJ  ri  r   r  r  r  r  rh  rq   r  r  r  r  r  r  r  r	  r  r%  r  rS   r	  r  r   r   r   r  r.  rH  rK  r  r&  rS  r  rW  rZ  r  r  rd  rl  ra   rs  rU   r  r  r  r   r   r  r  rv  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   rs   r(  r  r  r  r<   r?  r  rP   rN  rP  rR  r{   r|   r}   r   r      s&        K&+OXd^+++&1111H:>h67>>>:>h67>>>MQeL(C,G&H!IJQQQ ;?#'$($((,*/ ,0*.%) #'*. 7;!06 )+)+ ',%*     ,0 DH&(DH48 "!##+0[j& j&$*+T$sCx.-AAB
j& $$78j& C=j& SMj& SMj& !j& "$j& j& !K
j&" #j&& #5/'j&( #3-)j&, c]-j&.  
/j&4 %5j&6 !7j&8 !)!
9j&> (0}?j&@ Aj&B _-Cj&D $I
Ej&J Kj&L #'Mj&N #'Oj&P $eC!::;;<
Qj&V !%Wj&X #Yj&Z [j&\ +t#$
]j&b #'#
cj&h  
ij&n '
oj&t  
uj&z $D>{j&| "&
}j&L #++@"AMj&N  $Oj&P !))@ AQj&R ".1Sj&T "*!"
Uj&Z %)[j&\ 
]j& j& j& j&X       D 	.38n	.	z,,	-	. 	. 	. \	.+ + + + +; %os&: ;;TX; ; ; ;z	F 	F 	Fh
 h
 h
T$    U(01F(GU U U U,4    ($(c3h$8	}11	2   &AA$(c3h$8A	}11	2A A A AJ $()9$:DKDM	   X
 UZ $()9$:DKEN	   X
 kp $()9$:DI'RV-Y`afYgJgDh	"M1	2   X 	3 33 '(3 	3 3 3 3jB>+B> tCH~&B> 	B>
 
B> B> B> B>HPP$(c3h$8P	}12	3P P P Pl 1;	
 

 
 !)	

 

 
 
 
, EOP PP4<SMP	P P P P, NR' ''(,'=Ec]'	' ' ' 'H (,	+
 +
+
 +
  }	+

 
+
 +
 +
 +
Z t    4

"&
	%s
#	$
 
 
 
"&	%s
#	$   $	4 	t 	uczAR8S 	 	 	 	Z#S	Z# T#s(^,d4S#X3G.HHIZ# Z# Z# Z#x++$(.>)?$@+ + + +^ $(c3h$8BI$-	   X SX $(c3h$8BI%.	   X 	M, M,M, tCH~&M, 	M, M, M, M,f di $()9$:FISZ[`Sa	   X
 $()9$:FISZ[_S`	   X < << '(< 	< < < <|;; ; $	;
 CHo; S#X; ; ; ;z     $ZBZB '(ZB S#X	ZB ZB ZB ZBxs 3    , ,C , , , ,\c #    *Fs F3 F F F FP( (3 ( ( ( (TD) DC D D D DLT3 Ts T3 T T T Tl3    *)C ) ) ) )^ $)&+#(# ## # 4.	#
 d^# 4.# # # #R $)&+#(# ## # 4.	#
 d^# 4.# # # #JDS D# D D D DT $)&+#(   4.	
 d^ 4.   :DS D D D D DL-5   >LL5=L L L L\;;-5; ; ; ;B $)	  S$Y 4.	 
   $6c4i 0 6 6 6 6 6x $(	  S$Y 4.	 
   2DuS$Y'7 D D D D DN 
	   0BB 
	B B B BH 
	   >NN 
	N N N Nd  $v v}v 
	v v v vp// / / /l .21 1#1 &c]1 1 1 1r ? # #
   H .21 1#1 &c]1 1 1 1
#
 
 
 
2 .2(,	
 
#
 &c]
 %	
 
 
 
,}!}! $D>}! D>	}!
 #+4.}! #+4.}! c]}! }! }! }! }! }!~ V[]]0 0 ]0j &*$(3737- -- c]- D>	-
 #+4.- #+4.- - - -^ V[]]J% J% ]J%X    $ :>! !!)1#! ! ! !L /3*.3737,0@ @@ &d^@ "$	@
 #+4.@ #+4.@ $D>@ @ @ @DW W W &*$ $S$s)^,-$ c]$ 
$s)		$ $ $ $< /3*./ // / 	/
 &d^/ "$/ 
sEz	/ / / /f@ @ @D! 
#! ! ! !FJ 
J J J JX 
+3C= 
  
  
  
DRR	/	0R R R R # # #t # # # #J 4<TN	   6    $$$1$;?$	$ $ $ $L5c 5Xd^ 5 5 5 5.55,4TN5	tDz4:%	&5 5 5 5<54 5 5 5 5( 15	; ;; #4.; n-	; ; ; ;F *.041, 1,1, "$Z1, 4 012	1,
 #4.1, !1, n-1, 1, 1, 1,f'c '4 ' ' ' '8KK K 	K
 K 
*	K K K KZ D    -@j -@ -@ -@ -@^&z &d & & & &P06 06 06 06 06ds* s s s s sj:$:;>:GJ: : : :x 8L    6@@*-@	@ @ @ @& 6:J JJ%-c]J	J J J J4/J /8J;O / / / /bC HZ,@    4s x
/C    &	(3 	(8D> 	( 	( 	( 	( #	*	   $ EI 58>B	   X
 58>A	   X !	P PTNP !P SM	P
 
P P P Pd $    
:# 
:(4. 
: 
: 
: 
:66),6	)	6 6 6 6pf f >Af 	.	!f  f  f  f P
 
8P 
 
 
 
>G$G$	x}hsm+	,G$ G$ G$ G$R Y122266	.	!6 6 6 326 cSVh    B ;?" ""*23-"	" " " "HN$ N4 N N N N R$ R R R R4 MR "3-EI	c   (E E E E E Ec C HSM    & FJ &*5=c]	   & &*!%	0 00 c]0 #	0
 
!	"0 0 0 0d x} S	    0 IM -8@	#   *
>(4. 
>T 
> 
> 
> 
> +/ "3-	!	"   @ JN* *"3-*9A#*	$*+	,* * * *\ %),0!%	   SM  %SM  #	 
 
c49n	       D"%	   .# # #B1W 1W 1Wf4 4 4 4v *.|% |%|% "|% tCH~&	|%
 !|% |% |% |%|3 8C=    &Uc Ud U U U U 48,0.3)-h* h*h* 4S#X/0h* c4i()	h*
 &d^h* !h* 
sE$*%%	&h* h* h* h*\ 48,0.3+/U# U#U# U# 4S#X/0	U#
 c4i()U# &d^U# #4.U# 
tDz4	 U# U# U# U#v 48,0.3Z ZZ Z 4S#X/0	Z
 c4i()Z &d^Z Z Z Z@ 48,0.3   4S#X/0	
 c4i() &d^ 
(	)   < 48,0.3)-{ {{ 4S#X/0{ c4i()	{
 &d^{ !{ { { {z
#':
EI#Y
	d
 
 
 
, FJY Y,4TNY Y Y Y$ BF
 
"
19#
 
 
 
&Py &P &P &P &PP
 
 
0
%>
 
 
 
.! ! !    r|   r   )r  rw  enumr  rt  r  r   r  r  r  r  r   	functoolsr   typingr   r   r   r	   r
   r   r   r   r   r   r   httpxr   r   pydanticr   typing_extensionsr   r   litellm.litellm_core_utils2litellm.litellm_core_utils.exception_mapping_utilsr   litellm._loggingr   litellm._uuidr   litellm.caching.cachingr   r   r   r   litellm.constantsr   "litellm.integrations.custom_loggerr   #litellm.litellm_core_utils.asyncifyr   r  r   ,litellm.litellm_core_utils.coroutine_checkerr   .litellm.litellm_core_utils.credential_accessorr    %litellm.litellm_core_utils.dd_tracingr!   *litellm.litellm_core_utils.litellm_loggingr"   r  &litellm.router_strategy.budget_limiterr#   "litellm.router_strategy.least_busyr$   #litellm.router_strategy.lowest_costr%   &litellm.router_strategy.lowest_latencyr&   &litellm.router_strategy.lowest_tpm_rpmr'   )litellm.router_strategy.lowest_tpm_rpm_v2r(   &litellm.router_strategy.simple_shuffler)   )litellm.router_strategy.tag_based_routingr*   /litellm.router_utils.add_retry_fallback_headersr+   r,    litellm.router_utils.batch_utilsr-   r.   r/   /litellm.router_utils.client_initalization_utilsr0   2litellm.router_utils.clientside_credential_handlerr1   r2   #litellm.router_utils.cooldown_cacher3   &litellm.router_utils.cooldown_handlersr4   r5   r6   r7   r8   ,litellm.router_utils.fallback_event_handlersr9   r:   r;   *litellm.router_utils.get_retry_from_policyr<   r8  !litellm.router_utils.handle_errorr=   r>   Dlitellm.router_utils.pre_call_checks.prompt_caching_deployment_checkr?   Clitellm.router_utils.pre_call_checks.responses_api_deployment_checkr@   >litellm.router_utils.router_callbacks.track_deployment_metricsrA   rB   litellm.schedulerrC   rD   litellm.types.llms.openairE   rF   rG   rH   litellm.types.routerrI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   litellm.types.servicesr^   litellm.types.utilsr_   r`   ra   r  rb   rc   rd   ro  re   rf   rg   rh   ri   rj   rk   rl   rm   rn   &router_utils.pattern_match_deploymentsrp   opentelemetry.tracerq   _Spanr  rr   rs   Enumru   r   r{   r|   r}   <module>r     s                   # # # # # #                                              & & & & & &  ! ! ! ! 9 9 9 9 " " " " " " 2 2 2 2 2 2                  9 8 8 8 8 8 ; ; ; ; ; ; B B B B B B U U U U U U J J J J J J M M M M M M 8 8 8 8 8 8 P P P P P P G G G G G G F F F F F F H H H H H H N N N N N N J J J J J J P P P P P P A A A A A A M M M M M M                
 R Q Q Q Q Q        > = = = = =                      
                              2 1 1 1 1 1 1 1                                                        . 0 / / / / / E E E E E E E E ) ) ) ) ) ) 9 9 9 9 9 9 R R R R R R R R R R                        G F F F F F !111111       
 DDDJ     $)   jq jq jq jq jq jq jq jq jq jqr|   