o
    ưi                     @   sh  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZ d dlZd dlZd dlZd dlmZ d dlm Z  d dl!m"Z" d dl#Z#d dl$Z#d dl%Z#d dl#m&Z& d d	l'm(Z( d d
l)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> d dl?m@ZA d dlBmCZC d dlDmEZE d dlFmGZG d dlHmIZI d dlJmKZK d dlLmMZM d dlNmOZO d dlPmQZQ d dlRmSZS d dlTmUZU d dlVmWZWmXZX d dlYmZZZm[Z[m\Z\ d d l]m^Z^ d d!l_m`Z`maZa d d"lbmcZcmdZd d d#lemfZf d d$lgmhZhmiZimjZjmkZkmlZl d d%lmmnZnmoZompZp d d&lqmrZs d d'ltmuZumvZv d d(lwmxZx d d)lymzZz d d*l{m|Z| d d+l}m~Z~mZ d d,lmZmZ d d-lmZmZmZmZ d d.lmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d d/lmZ d d0lmZmZmZ d d1lmZ d d1lmZ d d2lmZmZmZ d d3lmZmZmZmZmZmZmZmZmZmZ d4d5lmZ erd d6lmZ d d7lmZmZ d d8lmZ eeef ZneZeZeZeZG d9d: d:ejZG d;d< d<ZdS )=    Ndefaultdict)	lru_cache)TYPE_CHECKINGAnyAsyncGeneratorCallableDict	GeneratorListLiteralOptionalTupleUnioncast)AsyncOpenAI)	BaseModel)overload)get_secret_str)verbose_router_logger)uuid)	DualCacheInMemoryCache
RedisCacheRedisClusterCache)DEFAULT_MAX_LRU_CACHE_SIZE)CustomLogger)run_async_function)!_get_parent_otel_span_from_kwargs&get_metadata_variable_name_from_kwargs)coroutine_checker)CredentialAccessor)tracer)Logging)SensitiveDataMasker)JSONProviderRegistry)RouterBudgetLimiting)LeastBusyLoggingHandler)LowestCostLoggingHandler)LowestLatencyLoggingHandler)LowestTPMLoggingHandler)LowestTPMLoggingHandler_v2simple_shuffle)get_deployments_for_tag) add_fallback_headers_to_responseadd_retry_headers_to_response)"_get_router_metadata_variable_namereplace_model_in_jsonlshould_replace_model_in_jsonl)InitalizeCachedClient)get_dynamic_litellm_paramsis_clientside_credential)filter_team_based_modelsfilter_web_search_deployments)CooldownCache)DEFAULT_COOLDOWN_TIME_SECONDS_async_get_cooldown_deployments/_async_get_cooldown_deployments_with_debug_info_get_cooldown_deployments_set_cooldown_deployments)#_check_non_standard_fallback_formatget_fallback_model_grouprun_async_fallback)!get_num_retries_from_retry_policy)#async_raise_no_deployment_exceptionsend_llm_exception_alert)DeploymentAffinityCheck)ModelRateLimitingCheck)PromptCachingDeploymentCheck)0increment_deployment_failures_for_current_minute1increment_deployment_successes_for_current_minute)FlowItem	Scheduler)AllMessageValues	FileTypesOpenAIFileObjectOpenAIFilesPurpose)#CONFIGURABLE_CLIENTSIDE_AUTH_PARAMSVALID_LITELLM_ENVIRONMENTSAlertingConfigAllowedFailsPolicyAssistantsTypedDictCredentialLiteLLMParamsCustomRoutingStrategyBase
DeploymentDeploymentTypedDictGuardrailTypedDictLiteLLM_ParamsMockRouterTestingParamsModelGroupInfoOptionalPreCallChecksRetryPolicyRouterCacheEnumRouterGeneralSettingsRouterModelGroupAliasItemRouterRateLimitErrorRouterRateLimitErrorBasicRoutingStrategySearchToolTypedDict)ServiceTypes)CustomPricingLiteLLMParamsGenericBudgetConfigTypeLiteLLMBatch)	ModelInfo)ModelResponseStreamStandardLoggingPayloadUsage)
CustomStreamWrapperEmbeddingResponseModelResponseRulesfunction_setupget_llm_provider!get_non_default_completion_params
get_secretget_utc_datetimeis_region_allowed   )PatternMatchRouter)Span)
AutoRouterPreRoutingHookResponseComplexityRouterc                   @   s   e Zd ZdZdS )RoutingArgs<   N)__name__
__module____qualname__ttl r   r   E/home/app/Keep/.python/lib/python3.10/site-packages/litellm/router.pyr      s    r   c                _   @   s  e Zd ZU e Zeed< dZee ed< dZ	e
ed< dZdZee ed< dZee ed< dZeeeeeef   ed	< ddddddddddi dddddddddddd
dg g g i dddddi ddddddi dde ddf.deeee eeeef  f  dee deee  deee  dee dee dee
 dee dee
 dee dedeee  de
dee dee
 dee
 dee
 dee dee d ee d!ee
 d"ed#e d$ d%eee  d&ed'ed(ed)eeeeee!f f  d*ed+ed,ed-e
d.eee"ef  d/eee"f d0ee
 d1ee# d2ee d3ee d4e d5 d6ee$ d7ed8ee% d9ee& d:ee d;e
d<ed=df^d>d?Z'e(d=ee fd@dAZ)dBdC Z*dDdE Z+e(dFeeef d=ee,e-f fdGdHZ.dIe,fdJdKZ/d4ee0ef d7efdLdMZ1dNdO Z2dPdQ Z3dRdS Z4dTdU Z5dVdW Z6dXdY Z7dZd[ Z8d\d] Z9d^d_ Z:d`da Z;dbdc Z<ddde Z=dfee fdgdhZ>d6ee$ fdidjZ?dkefdldmZ@dnedoeeeef  d=eeAeBf fdpdqZCdnedoeeeef  d=eeAeBf fdrdsZDd=efdtduZEdvedoee fdwdxZFeGdnedoeeH dye d d=eBfdzd{ZIeG	d	dnedoeeH dye d d=eAfd|d{ZIeG	d	dnedoeeH dyee d e d f d=eeBeAf fd}d{ZI	d	dnedoeeH dyefd~d{ZIe(deJdeeK d=dfddZLdeBdoeeeef  ded=eBfddZMdeBdoeeeef  ded=eBfddZNdvedoee fddZOdnedoeeeef  d=eeAeBf fddZP	d
dnededee d=dfddZQdeRdked=dfddZS	d
dedee d=dfddZT	ddkededee d=eUfddZVe(dkeded=dfddZW	ddkededee d=dfddZXdkedefddZYdeded=eeee
f  fddZZdeded=eeee
f  fddZ[deded=eeee
f  fddZ\dee doeeeeef  eeeeef   f fddZ]dnedoeeeH  fddZ^eGdnedoeeeef  dye d d=eBfddZ_eG	d	dnedoeeeef  dye d d=eAfddZ_	d	dnedoeeeef  dyefddZ_eG	d	dnedoeeH de
dye d d=eAf
ddZ`eGdnedoeeH de
dye d d=eBf
ddZ`	d	dnedoeeH de
fddZ`dnede
dedeaedf deeef f
ddZbdned=efddZcdnedoeeH deeef fddZddednefddZedednefddZfdednefddĄZgdednefddƄZhdeidnefddɄZjdeidnefdd˄ZkdnededefddτZldnefddфZmdnefddӄZn			ddnededee dee dee f
dd؄Zo			ddnededee dee dee f
ddڄZpdnedefdd܄Zq			ddednedee dee dee f
dd߄ZrdednefddZsdefddZtdnedefddZudedefddZvdnedefddZwded=dfddZxdnedefddZydeeef dneded=eeef fddZzdnedefddZ{dnedefddZ|	d	dnedeeef dee d=e}fddZ~deeef dnefddZ	ddnedeeef dee d=e}fddZdeeef dnefddZdned=efd dZdned=efddZdneedf fddZdd Zdned=efdd	Zdned=efd
dZ	ddnee d=efddZdned=efddZdned=efddZdnefddZ	ddedee fddZ	ddede d fddZ	ddedee fddZ	ddedee fddZdefd d!Z	ddedee fd"d#Z		ddedee d$ee fd%d&Zd'eRd(ee d&ee d'ee d(ee d)ee dedefd*d+Ze d,d- Z				dded)ee d&ee d'ee d(ee f
d.d/Ze d0d1 Zdefd2d3Z	dded)ee fd4d5Z					dd6eRd7ee d8ee d'ee d(ee d9ee fd:d;Zd<d= Z	dd&eeeee f  d)ee d=eee  fd>d?Zd=ee fd@dAZ		dd'eRdBe
de
d7ee d8ee d=ee
ef fdCdDZdEdF Zd=ee fdGdHZd=efdIdJZdKee fdLdMZded=e dN fdOdPZded'eRd=efdQdRZdSedTee d=e
fdUdVZd=efdWdXZdnedYeAded=efdZd[ZdnedTee fd\d]ZdnedTee d=eaee ee f fd^d_Zdkefd`daZ	ddkedTee dbee fdcddZ		ddned7ee doeeeH  dTee deee dbee fdfdgZd)edhefdidjZdkedledmedned=eeU f
dodpZdhed=efdqdrZdkeUfdsdtZdhed=efdudvZdkeUfdwdxZdkeUd=efdydzZdefd{d|ZdkeUd=eUfd}d~ZdkeUdednefddZdkeUd=eeU fddZdede
d=dfddZ	ddnedee d=dfddZdkeUd=eeU fddZded=eeU fddZÐded=eeU fddZĐded=ee fddZŐded=eeU fddZƐded=eeeef  fddZeG	ddkeedf deddd=efddZeGdkddeded=efddZ	ddkeeedf  dedee d=efddZɐded=ee fddZʐded=ee fddZːdeded=ee fddZ͐d)eded=ee fddZϐd)ed=ee fddZАd)ed=eaee
 ee
 f fddZeeӐdd)ed=ee fddZԐd)ed=eee
f fddZ	ddYed)ee d=efddZded=dfddZdefddZ	ddee ded=ee fddZِded=efddZڐdee d=ee fddZېdeded=ee fddZ	ddednedee d=efddZ		ddedee dee d=ee fdĐdńZސddee d=ee fdƐdǄZ	ddkedee d=ee fdȐdɄZdee d=efdːd̄Z	ddee d=ee fd͐d΄Z	ddee dee d=eee  fdϐdЄZddѐd҄ZddӐdԄZ			ddee dee dee d=eeee f fd֐dׄZded=efdؐdلZdڐdۄ Zdܐd݄ Zddސd߄Z	ddned7edoeeeef  deee fddZdned=ee fddZdned=efddZ				ddnedoeeeeef   deeeef  dee deee d=eaeeeef f fddZ				ddnedeedoeeeeef   deeeef  dee dTee d=eee ef fddZ			ddnedeedoeeeeef   deeeef  dee f
ddZ			ddnedeedoeeeeef   deeeef  dee f
ddZ			ddnedeedoeeeeef   deeeef  dee d=ee fddZ				ddnedoeeeeef   deeeef  dee deee f
ddZ				ddnedoeeeeef   deeeef  dee deee f
ddZd7ee dee d=ee fddZd7ee d=ee fddZ	ddTee fddZ	ddeRd)ee fddZdeRfddZd d ZdefddZdd Zdd ZdS (  Routermodel_namesFcache_responsesi  default_cache_time_secondsNleastbusy_loggerlowesttpm_loggeroptional_callbacksINFOTr   simple-shuffle
model_listassistants_configsearch_toolsguardrail_list	redis_url
redis_host
redis_portredis_passwordredis_dbcache_kwargscaching_groups
client_ttlpolling_intervaldefault_prioritynum_retriesmax_fallbackstimeoutstream_timeoutdefault_litellm_paramsdefault_max_parallel_requestsset_verbosedebug_level)DEBUGr   default_fallbacks	fallbackscontext_window_fallbackscontent_policy_fallbacksmodel_group_aliasenable_pre_call_checksenable_tag_filteringtag_filtering_match_anyretry_afterretry_policymodel_group_retry_policyallowed_failsallowed_fails_policycooldown_timedisable_cooldownsrouting_strategy)r   
least-busyusage-based-routinglatency-based-routingcost-based-routingusage-based-routing-v2optional_pre_call_checksrouting_strategy_argsprovider_budget_configalerting_configrouter_general_settingsdeployment_affinity_ttl_secondsignore_invalid_deploymentsreturnc/           6      C   s"  || _ |.| _|| _|| _|| _|| _ddlm}/ |/ | _dt	_
| j du r9|dkr/ttj n
|dkr9ttj |,p=t | _|| _|pEg | _|pJg | _g | _i | _d}0d}1i }2|| _|dusg|dur|durd}0|durq||2d	< |dury||2d
< |durt||2d< |dur||2d< |	durtd t|	|2d< |2| | |2}1|
rt	jdu rt	jd"d|0i|2t	_|
| _t|1t  d| _t!||1d| _"|| _#d| _$|| _%g | _&t' | _(i | _)i | _*i | _+|pi | _,i | _-i | _.|dur| /| | j0| _1|D ]}3d|3d v rd| j|3d d < qng | _0d| _2|#dur|#| _3nt	j3| _3|%p't4| _5t6| j| j5d| _7|&| _8t  | _9|durB|| _:nt	j:durMt	j:| _:nt;j<| _:|durZ|| _=nt	j=duret	j=| _=nt	j>| _=|pnt	j?| _@|| _A| | _B|'| _C|p~t	jD}4| jE|4d |4| _D|dust	jFdur|pt	jF}4| jDdur| jDGd|4i nd|4ig| _D|pt	jH| _H|pt	jI}5| jE|5d |5| _ItJtK| _LtJtK| _MtJtK| _Ng | _O|pi }t	jP|| d| _Q|| _R| jRSd| | jRSdd | jRSdi d|i i | _T	 | jU|'|)d d| _VtWt	jXtYrt	jZ[| j\ nt	jZ[| j\ tWt	j]tYr3t	jZ^| j_ n| j_gt	_]tWt	j`tYrGt	jZa| jb n| jbgt	_`tWt	jctYr[t	jZd| je n| jegt	_c|)| _f|*| _g|-| _hd| _itjjk|| jgdr|(dur|(Gd ndg}(d| _l|!durtW|!tmrtnd"i |!| _ln	tW|!tnr|!| _l| jldurtodp| jljqdd  |"| _rd| _s|$durtW|$tmrttd"i |$| _sn	tW|$ttr|$| _s| jsdurtod!p| jsjqdd  |+| _u|(dur| v|( | judur| w  | x  | y  | z  dS )#a  
        Initialize the Router class with the given parameters for caching, reliability, and routing strategy.

        Args:
            model_list (Optional[list]): List of models to be used. Defaults to None.
            redis_url (Optional[str]): URL of the Redis server. Defaults to None.
            redis_host (Optional[str]): Hostname of the Redis server. Defaults to None.
            redis_port (Optional[int]): Port of the Redis server. Defaults to None.
            redis_password (Optional[str]): Password of the Redis server. Defaults to None.
            cache_responses (Optional[bool]): Flag to enable caching of responses. Defaults to False.
            cache_kwargs (dict): Additional kwargs to pass to RedisCache. Defaults to {}.
            caching_groups (Optional[List[tuple]]): List of model groups for caching across model groups. Defaults to None.
            client_ttl (int): Time-to-live for cached clients in seconds. Defaults to 3600.
            polling_interval: (Optional[float]): frequency of polling queue. Only for '.scheduler_acompletion()'. Default is 3ms.
            default_priority: (Optional[int]): the default priority for a request. Only for '.scheduler_acompletion()'. Default is None.
            num_retries (Optional[int]): Number of retries for failed requests. Defaults to 2.
            timeout (Optional[float]): Timeout for requests. Defaults to None.
            default_litellm_params (dict): Default parameters for Router.chat.completion.create. Defaults to {}.
            set_verbose (bool): Flag to set verbose mode. Defaults to False.
            debug_level (Literal["DEBUG", "INFO"]): Debug level for logging. Defaults to "INFO".
            fallbacks (List): List of fallback options. Defaults to [].
            context_window_fallbacks (List): List of context window fallback options. Defaults to [].
            enable_pre_call_checks (boolean): Filter out deployments which are outside context window limits for a given prompt
            model_group_alias (Optional[dict]): Alias for model groups. Defaults to {}.
            retry_after (int): Minimum time to wait before retrying a failed request. Defaults to 0.
            allowed_fails (Optional[int]): Number of allowed fails before adding to cooldown. Defaults to None.
            cooldown_time (float): Time to cooldown a deployment after failure in seconds. Defaults to 1.
            routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing", "cost-based-routing"]): Routing strategy. Defaults to "simple-shuffle".
            routing_strategy_args (dict): Additional args for latency-based routing. Defaults to {}.
            alerting_config (AlertingConfig): Slack alerting configuration. Defaults to None.
            provider_budget_config (ProviderBudgetConfig): Provider budget configuration. Use this to set llm_provider budget limits. example $100/day to OpenAI, $100/day to Azure, etc. Defaults to None.
            deployment_affinity_ttl_seconds (int): TTL for user-key -> deployment affinity mapping. Defaults to 3600.
            ignore_invalid_deployments (bool): Ignores invalid deployments, and continues with other deployments. Default is to raise an error.
        Returns:
            Router: An instance of the litellm.Router class.

        Example Usage:
        ```python
        from litellm import Router
        model_list = [
        {
            "model_name": "azure-gpt-3.5-turbo", # model alias
            "litellm_params": { # params for litellm completion/embedding call
                "model": "azure/<your-deployment-name-1>",
                "api_key": <your-api-key>,
                "api_version": <your-api-version>,
                "api_base": <your-api-base>
            },
        },
        {
            "model_name": "azure-gpt-3.5-turbo", # model alias
            "litellm_params": { # params for litellm completion/embedding call
                "model": "azure/<your-deployment-name-2>",
                "api_key": <your-api-key>,
                "api_version": <your-api-version>,
                "api_base": <your-api-base>
            },
        },
        {
            "model_name": "openai-gpt-3.5-turbo", # model alias
            "litellm_params": { # params for litellm completion/embedding call
                "model": "gpt-3.5-turbo",
                "api_key": <your-api-key>,
            },
        ]

        router = Router(model_list=model_list, fallbacks=[{"azure-gpt-3.5-turbo": "openai-gpt-3.5-turbo"}])
        ```
        r   )ServiceLoggingTr   r   localNZredisurlhostportpasswordzwDeprecated 'redis_db' argument used. Please remove 'redis_db' from your config/database and use 'cache_kwargs' instead.dbtype)redis_cacheZin_memory_cache)r   r   modellitellm_params)cacheZdefault_cooldown_time)fallback_param*)paramsZ
router_objr   max_retriesmetadatar   r   r   )r   r   router_budget_limitingz+[32mRouter Custom Retry Policy Set:
{}[0mZexclude_nonez3[32mRouter Custom Allowed Fails Policy Set:
{}[0mr   ){r   r   r   r   r   r   Zlitellm._service_loggerr   service_logger_objlitellmZsuppress_debug_infor   setLevelloggingr   r   r`   r   r   r   r   deployment_namesZdeployment_latency_mapr   strwarningupdate_create_redis_cacher   Cacher   r   r   rK   	schedulerr   default_deploymentr   provider_default_deployment_idsry   pattern_routerteam_pattern_routersauto_routerscomplexity_routersr    model_id_to_deployment_index_map model_name_to_deployment_indicesset_model_listr   healthy_deployments_access_groups_cacher   r:   r   r9   cooldown_cacher   Zfailed_callsr   openaiZDEFAULT_MAX_RETRIESr   ZROUTER_MAX_FALLBACKSrequest_timeoutr   r   r   r   r   validate_fallbacksr   appendr   r   r   inttotal_calls
fail_callssuccess_callsprevious_modelsZChatchatr   
setdefaultZdeployment_statsrouting_strategy_initaccess_groups
isinstance_async_success_callbacklistlogging_callback_managerZ"add_litellm_async_success_callbackdeployment_callback_on_successsuccess_callbackadd_litellm_success_callback#sync_deployment_callback_on_success_async_failure_callbackZ"add_litellm_async_failure_callback$async_deployment_callback_on_failurefailure_callbackZadd_litellm_failure_callbackdeployment_callback_on_failurer   r   r   Zrouter_budget_loggerr&   Z!should_init_router_budget_limiterr   dictr^   infoformat
model_dumpr   r   rS   r   add_optional_pre_call_checks_initialize_alertinginitialize_assistants_endpointinitialize_router_endpointsapply_default_settings)6selfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Z
cache_typer   cache_configmZ
_fallbacksZ_content_policy_fallbacksr   r   r   __init__   s   






















zRouter.__init__c                  C   s.   t tj} | j| j }d|v r|d |S )zS
        Returns a list of valid arguments for the Router.__init__ method.
        r  )inspectgetfullargspecr   r  args
kwonlyargsremove)Zarg_specZ
valid_argsr   r   r   get_valid_args  s
   
zRouter.get_valid_argsc                 C   s   g }|  | dS )z;
        Apply the default settings to the router.
        N)r  )r  Zdefault_pre_call_checksr   r   r   r    s   
zRouter.apply_default_settingsc                 C   s   t jt j|  t jt j|  t jt j|  t jt j|  t jt j|  t jt j|  t jt j	|  | j
durM| j
D ]}t jjt j	|dd q@dS dS )z
        Pseudo-destructor to be invoked to clean up global data structures when router is no longer used.
        For now, unhook router's callbacks from all lists
        NF)Zrequire_self)r   r   Z#remove_callback_from_list_by_objectr   r  r  r  input_callbackZservice_callback	callbacksr   )r  callbackr   r   r   discard  s8   

zRouter.discardr  c                 C   s^   |  d}|std}|durt|trt|}|r(tdi i | d|iS tdi | S )za
        Initializes either a RedisCache or RedisClusterCache based on the cache_config.
        startup_nodesZREDIS_CLUSTER_NODESNr   )getru   r   r   jsonloadsr   r   )r  r  Z_env_cluster_nodesr   r   r   r     s   


zRouter._create_redis_cacher   c                 C   s   | j jdu r|| j _dS dS )z
        Update the redis cache for the router, if none set.

        Allows proxy user to just do
        ```yaml
        litellm_settings:
            cache: true
        ```
        and caching to just work.
        N)r   r   )r  r   r   r   r   _update_redis_cache  s   zRouter._update_redis_cachec                 C   s  t d|  dgdd tD  }|d ur3t|to||v }t|t}|s3|s3td| d| d|tjjks>|tjkrit| j	d| _
ttjtrStj| j
 n| j
gt_ttjtrgtj| j
 d S d S |tjjkst|tjkrt| j	|d	| _ttjtrtj| j d S d S |tjjks|tjkrt| j	|d	| _ttjtrtj| j d S d S |tjjks|tjkrt| j	|d	| _ttjtrtj| j d S d S |tjjks|tjkrt| j	i d	| _ttjtrtj| j d S d S 	 d S )
NzRouting strategy: r   c                 S   s   g | ]}|j qS r   )value).0sr   r   r   
<listcomp>  s    z0Router.routing_strategy_init.<locals>.<listcomp>zInvalid routing_strategy: 'z'. Valid options: z. Check 'router_settings.routing_strategy' in your config.yaml or the 'routing_strategy' parameter if using the Router SDK directly.)router_cache)r(  routing_args)r   r	  rd   r   r   
ValueErrorZ
LEAST_BUSYr$  r'   r   r   r   r  r   r   r  r   add_litellm_callbackZUSAGE_BASED_ROUTINGr*   r   ZUSAGE_BASED_ROUTING_V2r+   lowesttpm_logger_v2ZLATENCY_BASEDr)   lowestlatency_loggerZ
COST_BASEDr(   lowestcost_logger)r  r   r   Zvalid_strategy_stringsZis_valid_stringZis_valid_enumr   r   r   r     sv   







zRouter.routing_strategy_initc                 C   st   |  tj| _|  tj| _|  tj| _|  tj| _|  tj| _|  tj| _|  tj| _|  tj	| _	d S N)
factory_functionr   Zacreate_assistantsZadelete_assistantZaget_assistantsZacreate_threadZaget_threadZa_add_messageZaget_messagesZarun_threadr  r   r   r   r  C  s   z%Router.initialize_assistants_endpointc                 C   s  | j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tj	dd| _	| j tj
d	d| _
| j tjd
d| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _dS )z+Helper to initialize core router endpoints.
moderation	call_typeanthropic_messagesagenerate_contentaadapter_generate_content
aresponsesafile_deleteafile_content	responsesaget_responsesacancel_responsesacompact_responsesadelete_responsesalist_input_items
_arealtime_aresponses_websocketacreate_fine_tuning_jobacancel_fine_tuning_jobalist_fine_tuning_jobsaretrieve_fine_tuning_jobalist_filesaimage_editallm_passthrough_routeN)r0  r   Zamoderationr5  Zaanthropic_messagesr6  r7  r8  r9  r:  r;  r<  r=  r>  r?  r@  rA  rB  rC  rD  rE  rF  Z
afile_listrH  rI  r1  r   r   r   _initialize_core_endpointsN  s   z!Router._initialize_core_endpointsc                 C   sH   ddl m}m}m} | j|dd| _| j|dd| _| j|dd| _dS )z"Initialize vector store endpoints.r   )asearchcreatesearchavector_store_searchr3  vector_store_searchvector_store_createN)litellm.vector_stores.mainrK  rL  rM  r0  rN  rO  rP  )r  rK  rL  rM  r   r   r   "_initialize_vector_store_endpoints  s   z)Router._initialize_vector_store_endpointsc                 C   sT  ddl m} ddl m} ddl m} ddl m} ddl m} ddl m} ddl m} dd	l m} dd
l m	}	 ddl m
}
 ddl m} ddl m} | j|dd| _| j|dd| _| j|dd| _| j|	dd| _| j|dd| _| j|
dd| _| j|dd| _| j|dd| _| j|dd| _| j|dd| _| j|dd| _| j|dd| _dS )z'Initialize vector store file endpoints.r   acreateadelete)alist)	aretrieve)aretrieve_content)aupdaterL  delete)r   )retrieve)retrieve_content)r   avector_store_file_creater3  vector_store_file_createavector_store_file_listvector_store_file_listavector_store_file_retrievevector_store_file_retrieveavector_store_file_contentvector_store_file_contentavector_store_file_updatevector_store_file_updateavector_store_file_deletevector_store_file_deleteN)Zlitellm.vector_store_files.mainrT  rV  rW  rX  rY  rZ  rL  r]  r   r^  r_  r   r0  r`  ra  rb  rc  rd  re  rf  rg  rh  ri  rj  rk  )r  Zavector_store_file_create_fnZavector_store_file_delete_fnZavector_store_file_list_fnZavector_store_file_retrieve_fnZavector_store_file_content_fnZavector_store_file_update_fnZvector_store_file_create_fnZvector_store_file_delete_fnZvector_store_file_list_fnZvector_store_file_retrieve_fnZvector_store_file_content_fnZvector_store_file_update_fnr   r   r   '_initialize_vector_store_file_endpoints  s`   z.Router._initialize_vector_store_file_endpointsc                 C   s\   ddl m}m}m}m} | j|dd| _| j|dd| _| j|dd| _| j|dd| _dS )	z"Initialize Google GenAI endpoints.r   )r6  agenerate_content_streamgenerate_contentgenerate_content_streamr6  r3  rn  rm  ro  N)Zlitellm.google_genair6  rm  rn  ro  r0  )r  r6  rm  rn  ro  r   r   r   "_initialize_google_genai_endpoints  s   z)Router._initialize_google_genai_endpointsc                 C   sd   ddl m}m} | j|dd| _| j|dd| _ddlm}m} | j|dd| _| j|dd| _d	S )
z$Initialize OCR and search endpoints.r   )aocrocrrq  r3  rr  rK  rM  rK  rM  N)Zlitellm.ocrrq  rr  r0  Zlitellm.searchrK  rM  )r  rq  rr  rK  rM  r   r   r    _initialize_ocr_search_endpoints  s   z'Router._initialize_ocr_search_endpointsc                 C   s   ddl m}m}m}m}m}m}m}m}m	}	m
}
 | j|dd| _| j|dd| _| j|dd| _| j|dd| _| j|dd| _| j|
d	d| _
| j|d
d| _| j|dd| _| j|dd| _| j|	dd| _	dS )zInitialize video endpoints.r   )
avideo_contentavideo_generationavideo_listavideo_remixavideo_statusvideo_contentvideo_generation
video_listvideo_remixvideo_statusrv  r3  r{  rw  r|  ry  r~  ru  rz  rx  r}  N)Zlitellm.videosru  rv  rw  rx  ry  rz  r{  r|  r}  r~  r0  )r  ru  rv  rw  rx  ry  rz  r{  r|  r}  r~  r   r   r   _initialize_video_endpoints  s2   0z"Router._initialize_video_endpointsc                 C   s   ddl m}m}m}m}m}m}m}m} ddl	m
}	 | j|dd| _| j|dd| _| j|dd| _| j|dd| _| j|d	d| _| j|d
d| _| j|dd| _| j|dd| _|	 D ]\}
}t| |
| j||
d q^dS )zInitialize container endpoints.r   )acreate_containeradelete_containeralist_containersaretrieve_containercreate_containerdelete_containerlist_containersretrieve_container)_generated_endpointsr  r3  r  r  r  r  r  r  r  N)Zlitellm.containersr  r  r  r  r  r  r  r  Z#litellm.containers.endpoint_factoryr  r0  itemssetattr)r  r  r  r  r  r  r  r  r  Zcontainer_file_endpointsnamefuncr   r   r   _initialize_container_endpoints6  s:   (
z&Router._initialize_container_endpointsc                 C   sL   | j tjdd| _| j tjdd| _| j tjdd| _| j tjdd| _dS )z*Initialize Anthropic Skills API endpoints.acreate_skillr3  alist_skills
aget_skilladelete_skillN)r0  r   r  r  r  r  r1  r   r   r   _initialize_skills_endpointsc  s   z#Router._initialize_skills_endpointsc           	      C   s   ddl m} ddl m} ddl m} ddl m} ddl m} ddl m} ddl m} dd	l m} | j	|d
d| _
| j	|dd| _| j	|dd| _| j	|dd| _| j	|dd| _| j	|dd| _| j	|dd| _| j	|dd| _dS )z-Initialize Google Interactions API endpoints.r   )acancelrS  rU  )aget)cancelr[  r\  r   acreate_interactionr3  create_interactionaget_interactionget_interactionadelete_interactiondelete_interactionacancel_interactioncancel_interactionN)Zlitellm.interactionsr  rT  rV  r  r  rL  r]  r   r0  r  r  r  r  r  r  r  r  )	r  r  r  r  r  r  r  r  r  r   r   r   "_initialize_interactions_endpointsr  s@   z)Router._initialize_interactions_endpointsc                 C   sL   |    |   |   |   |   |   |   |   |   dS )zvHelper to initialize specialized router endpoints (vector store, OCR, search, video, container, skills, interactions).N)	rR  rl  rp  rt  )_override_vector_store_methods_for_routerr  r  r  r  r1  r   r   r   !_initialize_specialized_endpoints  s   z(Router._initialize_specialized_endpointsc                 C   s   |    |   d S r/  )rJ  r  r1  r   r   r   r    s   z"Router.initialize_router_endpointsr   c                 C   sZ   |du rdS |D ]"}t |tstd| dt|dkr*td| dt| dqdS )z3
        Validate the fallbacks parameter.
        NzItem 'z' is not a dictionary.rx   zDictionary 'z%' must have exactly one key, but has z keys.)r   r  r*  len)r  r   Zfallback_dictr   r   r   r     s   
zRouter.validate_fallbacksc                    s  |d u rd S d|v }d|v }d|v }|s|s|rc| j d u r g | _ d }| j D ]}t|tr0|} nq%|d urL|jp9||_|jp?||_|jpE||_| j|_nt| j| j|||d}| j 	| t
j| d|v rddlm  | j d u rug | _ t fdd	| j D }|s  }	| j 	|	 t
j|	 |D ]F}
d }|
d
v rq|
dkrt| jd}n|
dkrt| j| j| jd}n
|
dkrt| jd}|d u rq| j d u rg | _ | j 	| t
j| qd S )Ndeployment_affinityresponses_api_deployment_checksession_affinity)r   ttl_secondsenable_user_key_affinityenable_responses_api_affinityenable_session_id_affinityencrypted_content_affinityr   EncryptedContentAffinityCheckc                 3   s    | ]}t | V  qd S r/  )r   )r%  cbr  r   r   	<genexpr>  s
    
z6Router.add_optional_pre_call_checks.<locals>.<genexpr>)r  r  r  r  Zprompt_caching)r   r   )
dual_cacher   r   Zenforce_model_rate_limits)r  )r   r   rE   r  r  r  r   r  r   r   r   r   r+  ZElitellm.router_utils.pre_call_checks.encrypted_content_affinity_checkr  anyrG   r&   r   r   rF   )r  r   r  r  r  Zexisting_affinity_callbackr  Zaffinity_callbackZalready_registeredZec_callbackpre_call_check	_callbackr   r  r   r    s   




z#Router.add_optional_pre_call_checks
deploymentc              
   C   s   z/t |}|d }tjrtddd}|||d< |W S d|v r-|d dd d |d< |W S  tyG } ztdt	|  |d}~ww )	z
        returns a copy of the deployment with the api key masked

        Only returns 2 characters of the api key and masks the rest with * (10 *).
        r      r   )Zvisible_prefixZvisible_suffixapi_keyNz
**********z+Error occurred while printing deployment - )
copydeepcopyr   Zredact_user_api_key_infor$   Z	mask_dict	Exceptionr   debugr   )r  r  Z_deployment_copyr   Zmaskerer   r   r   print_deployment  s"   
zRouter.print_deploymentr   messagesc              
   K   sn   z(t d| d ||d< ||d< | j|d< | j||d | jdi |}|W S  ty6 } z|d}~ww )	z
        Example usage:
        response = router.completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hey, how's it going?"}]
        zrouter.completion(model=z,..)r   r  original_functionr   kwargsNr   )r   r  _completion_update_kwargs_before_fallbacksfunction_with_fallbacksr  )r  r   r  r  responser  r   r   r   
completion8  s   
zRouter.completionc              
   K   s  d }d }z|  }||d< | j|||dd |d}|d   }|dd }|d ur:tj| j||f|dd}	|	  | j||d |dd  |d }| j||d}
|	d	d }|d urh|
d urh||
j
krhd }n|
}| |su| j|d
 i ||| j|d|}tjdi |}td| d t|tr| j|||d}|rtjd|ddt|tr| j|||dW S |W S  ty } ztd| dt| d |d ur| || |d }~ww )Nr   specific_deploymentr   r  r  request_kwargsr   silent_modelT)targetr  r  daemonr  r  r  r  r  cachingclientzlitellm.completion(model=)[32m 200 OK[0mr   r  r  Response output was blocked. messager   llm_providermodel_responser  initial_kwargs)[31m Exception [0mr   )r  get_available_deploymentpop	threadingThread_silent_experiment_completionstart_update_kwargs_with_deployment_get_clientr   r  has_model_id routing_strategy_pre_call_checksr   r   r  r   r	  r   rp   "_should_raise_content_policy_errorContentPolicyViolationErrorrn   _completion_streaming_iteratorr  r   (_set_deployment_num_retries_on_exception)r  r   r  r  
model_namer  #input_kwargs_for_streaming_fallbackr   r  threadpotential_model_clientdynamic_api_keymodel_clientinput_kwargsr  _should_raiser  r   r   r   r  K  s   





zRouter._completionc                 K   sd   ddl m} ||}d|vri |d< d|d d< |dd |dd |d	d |d
d |S )ze
        Prepare kwargs for a silent experiment by ensuring isolation from the primary call.
        r   safe_deep_copyr   Tis_silent_experimentZlitellm_call_idNlitellm_logging_objstandard_logging_objectZproxy_server_request)'litellm.litellm_core_utils.core_helpersr  r  )r  r  r  silent_kwargsr   r   r   _get_silent_experiment_kwargs  s   z$Router._get_silent_experiment_kwargsr  c              
   K   s   z9| di  ddrW dS t|}td|  | jd	i |}| jd	|ttt	t
t
f  |d| W dS  tyY } ztd| dt
|  W Y d}~dS d}~ww )
zE
        Run a silent experiment in the background (thread).
        r   r  FN%Starting silent experiment for model r   r  #Silent experiment failed for model : r   )r   r  r  r   r	  r  r  r   r   r	   r   r  errorr  r  r  r  r  r  r   r   r   r    s(   
z$Router._silent_experiment_completionstreamc                       d S r/  r   r  r   r  r   r  r   r   r   acompletion     zRouter.acompletionc                    r  r/  r   r  r   r   r   r    r  c                    r  r/  r   r  r   r   r   r    r  c                    s"  zt||d< ||d< ||d< | j |d< | j||d |dp!| j}t }| |}|r9| j|||dI d H W S |d urNt|trN| j	di |I d H }n| j
di |I d H }t }	|	| }
t| jjtj|
d||	t|d	 |W S  ty } ztt| |t |d
 |d }~ww )Nr   r  r   r  r  priority)r   r  r  r  servicedurationr4  
start_timeend_timeparent_otel_spanlitellm_router_instancer  Zerror_traceback_stroriginal_exceptionr   )_acompletionr  r   r   time_is_prompt_management_model_prompt_management_factoryr   r   schedule_acompletionasync_function_with_fallbacksasynciocreate_taskr   async_service_success_hookrf   ROUTERr   r  rD   	traceback
format_exc)r  r   r  r   r  Zrequest_priorityr	  r  r  r
  	_durationr  r   r   r   r    sX   

fallback_itemcomplete_response_object_usagec                 C   sv   ddl m} ttt t| dd}|dur|gng }|dur-t|dr-|jdur-|| |j	|d}t
| d| dS )zCMerge partial-stream usage with fallback-stream usage on the chunk.r   )BaseTokenUsageProcessorusageN)usage_objects)Zlitellm.cost_calculatorr  r   r   rm   getattrhasattrr  r   Zcombine_usage_objectsr  )r  r  r  r  r   Zcombined_usager   r   r   _combine_fallback_usage0  s   

zRouter._combine_fallback_usager  r  c                    s@   ddl m  G fdddt} fdd}|| S )z
        Helper to iterate over a streaming response.

        Catches errors for fallbacks using the router's fallback system
        r   MidStreamFallbackErrorc                       4   e Zd Zdef fddZdd Zdd Z  ZS )zERouter._acompletion_streaming_iterator.<locals>.FallbackStreamWrapperasync_generatorc                    >   t  j|jjjd || _tdrj | _d S d S N)Zcompletion_streamr   custom_llm_providerlogging_obj_hidden_params)	superr  r   r*  r+  _async_generatorr"  r,  r  )r  r'  	__class__r  r   r   r  S  s   
zNRouter._acompletion_streaming_iterator.<locals>.FallbackStreamWrapper.__init__c                 S      | S r/  r   r1  r   r   r   	__aiter__`     zORouter._acompletion_streaming_iterator.<locals>.FallbackStreamWrapper.__aiter__c                    s   | j  I d H S r/  )r.  	__anext__r1  r   r   r   r4  c  s   zORouter._acompletion_streaming_iterator.<locals>.FallbackStreamWrapper.__anext__)r   r   r   r   r  r2  r4  __classcell__r   r  r0  r   FallbackStreamWrapperR  s    r8  c                    s  d } zXz2 z	3 d H W }|V  q6 W n  y } zddl m} |jd}ttt t|dd }z~ttd}dj	}dj
}dj}	jd	< |js\|jsad
< ndddd|jddg d
< j|d j|d|||	|ddI d H } t| dr| 2 z3 d H W }
|
rt|
trt|
dr|
| |
V  q6 nd V  W n ty } z
td|  |d }~ww W Y d }~nd }~ww W tjddx tdrz	 I d H  W n ty } ztd| W Y d }~nd }~ww | d ur2t| dr:z	|  I d H  W n) ty1 } ztd| W Y d }~nd }~ww W d    d S W d    d S W d    d S W d    d S 1 sVw   Y  d S tjddt tdrz	 I d H  W n ty } ztd| W Y d }~nd }~ww | d urt| drz	|  I d H  W n' ty } ztd| W Y d }~nd }~ww W d    w W d    w W d    w W d    w 1 sw   Y  w )Nr   stream_chunk_builderchunksr  r   r   r   r   r  r  system  You are a helpful assistant. You are given a message and you need to respond to it. You are also given a generated content. You need to respond to the message in continuation of the generated content. Do not repeat the same content. Your response should be in continuation of this text: rolecontent	assistantTr@  rA  prefixr  Fr   )r  disable_fallbacksr   r   r   model_groupr  r  r2  Fallback also failed: )shieldaclose7stream_with_fallbacks: error closing model_response: %s:stream_with_fallbacks: error closing fallback_response: %s)litellm.mainr:  r<  r   r   rm   r!  r   r   r   r   r   r  is_pre_first_chunkgenerated_contentr  *async_function_with_fallbacks_common_utilsr"  r   rk   r#  r  r   r  anyioZCancelScoperI  BaseExceptionr  )fallback_responseitemr  r:  complete_response_objectr  rF  r   r   r   r  fallback_errorr%  r  r  r  r  r   r   stream_with_fallbacksf  s  






R	2	 zERouter._acompletion_streaming_iterator.<locals>.stream_with_fallbacksZlitellm.exceptionsr%  rn   )r  r  r  r  r8  rW  r   rV  r   _acompletion_streaming_iteratorE  s
   
kz&Router._acompletion_streaming_iteratorc                    sB   ddl m  G fdddt}|  fdd}|| S )a  
        Sync equivalent of _acompletion_streaming_iterator.

        Wraps a sync streaming response so that MidStreamFallbackError
        (raised by CustomStreamWrapper.__next__) triggers the Router's
        fallback chain instead of surfacing directly to the caller.
        r   r$  c                       r&  )zHRouter._completion_streaming_iterator.<locals>.SyncFallbackStreamWrappersync_generatorc                    r(  r)  )	r-  r  r   r*  r+  _sync_generatorr"  r,  r  )r  rZ  r/  r   r   r    s   
zQRouter._completion_streaming_iterator.<locals>.SyncFallbackStreamWrapper.__init__c                 S   r1  r/  r   r1  r   r   r   __iter__  r3  zQRouter._completion_streaming_iterator.<locals>.SyncFallbackStreamWrapper.__iter__c                 S   s
   t | jS r/  )nextr[  r1  r   r   r   __next__  s   
zQRouter._completion_streaming_iterator.<locals>.SyncFallbackStreamWrapper.__next__)r   r   r   r
   r  r\  r^  r5  r   r6  r7  r   SyncFallbackStreamWrapper  s    r_  c                  3   s   d } zz
D ]}|V  qW n  y } zddl m} |jd}ttt t|dd }zwttd}dj	}dj
}dj}	jd	< |jsW|js\d
< ndddd|jddg d
< j|d jdi |||	d} t| dr| D ]}
|
rt|
trt|
dr|
| |
V  qnd V  W n ty } z
td|  |d }~ww W Y d }~nd }~ww W tdrz  W n ty } ztd| W Y d }~nd }~ww | d urt| drz|   W d S  ty } ztd| W Y d }~d S d }~ww d S d S tdrCz  W n tyB } ztd| W Y d }~nd }~ww | d urnt| droz|   W w  tym } ztd| W Y d }~w d }~ww w w )Nr   r9  r;  r  r   r   r   r   r  r  r=  r>  r?  rB  TrC  r  )r   r   r   r\  rG  closerJ  rK  r   )rL  r:  r<  r   r   rm   r!  r   r   r   r   r   r  rM  rN  r  r  r"  r   rk   r#  r  r   r  r`  rQ  r  )rR  rS  r  r:  rT  r  rF  r   r   r   r  rU  Z	close_errr%  r  r  r  Zrouter_selfr   r   rW    s   




		
DzDRouter._completion_streaming_iterator.<locals>.stream_with_fallbacksrX  )r  r  r  r  r_  rW  r   ra  r   r    s
   
\z%Router._completion_streaming_iteratorc              
      s   z8| di  ddrW dS t|}td|  | jd	i |}| jd	|ttt	 |d|I dH  W dS  t
yY } ztd| dt|  W Y d}~dS d}~ww )
z<
        Run a silent experiment in the background.
        r   r  FNr  r  r  r  r   )r   r  r  r   r	  r  r  r   r   rL   r  r  r   r  r   r   r   _silent_experiment_acompletionT  s*   
z%Router._silent_experiment_acompletionc              
      sj  d}d}i }z;|  }||d< t|}t }	| j|||dd|dI dH }|}t }
|
|	 }t| jjt	j
|d|	|
t|d | j||d |d   }|d	d}|durjt| jd ||d
| | j||d |d	d |d }| j||d}| j|  d7  < i ||| j|d|}|d	d tjd i |}|dd}| j||dd}|durt|tjr|4 I dH  	 | j|||dI dH  |I dH }W d  I dH  n1 I dH sw   Y  n| j|||dI dH  |I dH }t|tr| j|||d}|rtjd|dd| j|  d7  < td| d | j|||d t|trA| j|||dI dH W S |W S  tj y } z.|di dd}|di dd}| j!d| d| 7  _!|dury| "|| |d}~w t#y } z)td| dt$| d |dur| j%|  d7  < |dur| "|| |d}~ww )!z
        - Get an available deployment
        - call it with a semaphore over the call
        - semaphore specific to it's rpm
        - in the semaphore,  make a check against it's local rpm before running
        Nr   r  r  async_get_available_deploymentr  r  r  r   r  )r  r  r  rx   r  r  max_parallel_requestsr  r  client_type)r  r+  r  r  r  r  r  zlitellm.acompletion(model=r  )r  r  r  r  r   r   z$

Deployment Info: request_timeout: z

timeout: r  r  r   )&r  r   r  rc  r  r  r  r   r  rf   r  _track_deployment_metricsrb  r  _get_async_openai_model_clientr   r   r   r  r   r  r   	Semaphore&async_routing_strategy_pre_call_checksrp   r  r  r   r   r	  rn   rY  Timeoutr  r  r  r   r   )r  r   r  r  r  r  Z_timeout_debug_deployment_dictr  r  r	  r
  r  r   r  r  r  	_responser+  rpm_semaphorer  r  r  Z deployment_request_timeout_paramZdeployment_timeout_paramr   r   r   r  r  s  	

(




zRouter._acompletionr   r  metadata_variable_namec                 C   sV   | d| j|d< |dtt  d}| j|dr|}||i ||d dS )zm
        Adds/updates to kwargs:
        - num_retries
        - litellm_trace_id
        - metadata
        r   Zlitellm_trace_idNr   )rF  r   )r   r   r   r   r   uuid4_get_model_from_aliasr   )r  r   r  ro  r   r   r   r   r    s   z&Router._update_kwargs_before_fallbacks	exceptionc              	   C   sd   t |dr|jdurdS |di }|d}|dur0zt||_W dS  ttfy/   Y dS w dS )z
        Set num_retries from deployment litellm_params on the exception.

        This allows the retry logic in async_function_with_retries to use
        per-deployment retry settings instead of the global setting.
        r   Nr   )r"  r   r   r   r*  	TypeError)r  rs  r  r   Zdep_num_retriesr   r   r   r  2  s   

z/Router._set_deployment_num_retries_on_exceptionc                 C   sX   | j  }|di pi }| D ]\}}|du rq||| q||i | dS )z
        Adds default litellm params to kwargs, if set.

        Handles inserting this as either "metadata" or "litellm_metadata" depending on the metadata_variable_name
        r   N)r   r  r  r  r   r   )r  r  ro  defaultsZmetadata_defaultskeyr$  r   r   r   *_update_kwargs_with_default_litellm_paramsG  s   
	z1Router._update_kwargs_with_default_litellm_paramsfunction_namec                 C   s   | di  }|d  }t||d}t|d}| |i  d}| j||d}	| d}
|	|d< |
|d< t|tdi ||d	}| j|d
 |S )z.
        Handle clientside credential
        
model_infor   )r   r  rx  rF  )rF  r   idoriginal_model_idr  r   ry  r  Nr   )r   r  r5   r1   _generate_model_idrW   rZ   upsert_deployment)r  r  r  rx  ry  r   Zdynamic_litellm_paramsro  rF  	_model_idr|  deployment_pydantic_objr   r   r   _handle_clientside_credential\  s0   
z$Router._handle_clientside_credentialc                 C   s   |  di pi }t|tr|}n|jdd}| dpg }| dp#g }|s(|r4t|t| }||d< d|vrG| ddurI|d |d< dS dS dS )z
        Merge tools from deployment litellm_params with request kwargs.
        When both have tools, concatenate them (deployment tools first, then request tools).
        tool_choice: use request value if provided, else deployment's.
        r   Tr   ZtoolsZtool_choiceN)r   r   r  r  r   )r  r  Zdep_params_rawZ
dep_paramsZ	dep_toolsZ	req_toolsmergedr   r   r   _merge_tools_from_deployment|  s   
z#Router._merge_tools_from_deploymentc                 C   sr  | j ||d |di  }|d d }|d d}|d }t|dr:| j|||d}|j }|jj}|jj	}t
|d	}	||	i ||||d
 |di d}
|
ry||	 dp`g }t|}|
D ]}||vrr|| qg|||	 d< |di d}|rd| }||	 dpg }||vr|| |||	 d< ||d< | j||d d|d< | j||	d dS )a  
        3 jobs:
        - Adds selected deployment, model_info and api_base to kwargs["metadata"] (used for logging)
        - Adds default litellm params to kwargs, if set.
        - Merges tools from deployment with request (proxy-configured tools + request tools).
        r  ry  r   r   api_baser  )r  r  r  rx  rz  )r  ry  r  deployment_model_nametagslitellm_credential_namezCredential: r  datar   )r  ro  N)r  r   r  r6   r  ry  r  r   r   r  r1   r   r   r   r   _get_timeoutrw  )r  r  r  rx  ry  Zdeployment_litellm_model_nameZdeployment_api_baser  r  ro  Zdeployment_tagsZexisting_tagsZmerged_tagstagZcredential_nameZcredential_tagr   r   r   r    s^   







z%Router._update_kwargs_with_deploymentc                 C   sF   | j ||dd}|dd}|dur|dur||jkrd}|S |}|S )a  
        Helper to get AsyncOpenAI or AsyncAzureOpenAI client that was created for the deployment

        The same OpenAI client is re-used to optimize latency / performance in production

        If dynamic api key is provided:
            Do not re-use the client. Pass model_client=None. The OpenAI/ AzureOpenAI client will be recreated in the handler for the llm provider
        asyncrf  r  N)r  r   r  )r  r  r  r  r  r  r   r   r   ri    s   	
z%Router._get_async_openai_model_clientr  c                 C   s,   | ddp| ddp| jp| j ddS )z=Helper to get stream timeout from kwargs or deployment paramsr   N)r   r   r   )r  r  r  r   r   r   _get_stream_timeout  s   zRouter._get_stream_timeoutc                 C   sH   | ddp!| ddp!| ddp!| ddp!| jp!| j dd}|S )zAHelper to get non-stream timeout from kwargs or deployment paramsr   Nr   )r   r   r   r  r  r  r   r   r   r   _get_non_stream_timeout  s    
	zRouter._get_non_stream_timeoutc                 C   s8   d}| ddr| j||d}|du r| j||d}|S )z6Helper to get timeout from kwargs or deployment paramsNr   Fr  )r   r  r  r  r   r   r   r  	  s   zRouter._get_timeoutmodelsc              
      sV  dt dtt f fdd}dt dtt dtf fdd}t|trItdd	 |D rIg }|D ]}||d||d
| q/tj	| I dH }|S t|trtdd	 |D rg }t
|D ]\}	}
|D ]}||d||	|
d| qcq]tj	| I dH }dd tt|D }|D ]}t|tr||d  |d  q|d | q|S dS dS )a6  
        Async Batch Completion. Used for 2 scenarios:
        1. Batch Process 1 request to N models on litellm.Router. Pass messages as List[Dict[str, str]] to use this
        2. Batch Process N requests to M models on litellm.Router. Pass messages as List[List[Dict[str, str]]] to use this

        Example Request for 1 request to N models:
        ```
            response = await router.abatch_completion(
                models=["gpt-3.5-turbo", "groq-llama"],
                messages=[
                    {"role": "user", "content": "is litellm becoming a better product ?"}
                ],
                max_tokens=15,
            )
        ```


        Example Request for N requests to M models:
        ```
            response = await router.abatch_completion(
                models=["gpt-3.5-turbo", "groq-llama"],
                messages=[
                    [{"role": "user", "content": "is litellm becoming a better product ?"}],
                    [{"role": "user", "content": "who is this"}],
                ],
            )
        ```
        r   r  c              
      J   z j d| |d|I dH W S  ty$ } z|W  Y d}~S d}~ww zs
            Wrapper around self.async_completion that catches exceptions and returns them as a result
            r  Nr   r  r  r   r  r  r  r1  r   r   _async_completion_no_exceptions>	     zARouter.abatch_completion.<locals>._async_completion_no_exceptionsidxc              
      sR   z j d| |d|I dH |fW S  ty( } z
||fW  Y d}~S d}~ww r  r  )r   r  r  r  r  r1  r   r   *_async_completion_no_exceptions_return_idxI	  s   	zLRouter.abatch_completion.<locals>._async_completion_no_exceptions_return_idxc                 s       | ]}t |tV  qd S r/  )r   r  r%  r  r   r   r   r  \	      z+Router.abatch_completion.<locals>.<genexpr>r  Nc                 s   r  r/  )r   r   r  r   r   r   r  c	  r  )r   r  r  c                 S   s   g | ]}g qS r   r   )r%  _r   r   r   r'  n	  s    z,Router.abatch_completion.<locals>.<listcomp>rx   r   r   )r   r   rL   r   r   r   allr   r  gather	enumerateranger  tuple)r  r  r  r  r  r  _tasksr   r  r  r  r;  Zfinal_responsesr   r1  r   abatch_completion	  sN   $
zRouter.abatch_completionc                    sX   dt dtt f fdd}g }|D ]}||d||d| qtj| I dH }|S )a  
        Async Batch Completion - Batch Process multiple Messages to one model_group on litellm.Router

        Use this for sending multiple requests to 1 model

        Args:
            model (List[str]): model group
            messages (List[List[Dict[str, str]]]): list of messages. Each element in the list is one request
            **kwargs: additional kwargs
        Usage:
            response = await self.abatch_completion_one_model_multiple_requests(
                model="gpt-3.5-turbo",
                messages=[
                    [{"role": "user", "content": "hello"}, {"role": "user", "content": "tell me something funny"}],
                    [{"role": "user", "content": "hello good mornign"}],
                ]
            )
        r   r  c              
      r  r  r  r  r1  r   r   r  	  r  z]Router.abatch_completion_one_model_multiple_requests.<locals>._async_completion_no_exceptionsr  Nr   )r   r   rL   r   r  r  )r  r   r  r  r  r  Zmessage_requestr  r   r1  r   -abatch_completion_one_model_multiple_requestsv	  s"   z4Router.abatch_completion_one_model_multiple_requestsc                    r  r/  r   r  r   r   r   "abatch_completion_fastest_response	  r  z)Router.abatch_completion_fastest_responsec                    r  r/  r   r  r   r   r   r  	  r  c                    s   dd | dD }dtdttttf  dtdtdtttt	f f
fd	d
}g  dt
jf fdd}|D ]}t
|d|||d|} | q5 rut
j t
jdI dH \}	 |	D ]}
||
I dH }|durrd|jd< |  S q\ sMt	d)z
        model - List of comma-separated model names. E.g. model="gpt-4, gpt-3.5-turbo"

        Returns fastest response from list of model names. OpenAI-compatible endpoint.
        c                 S   s   g | ]}|  qS r   )stripr  r   r   r   r'  	      z=Router.abatch_completion_fastest_response.<locals>.<listcomp>,r   r  r   r  r   c              
      sp   z j d| ||d|I dH }|W S  tjy$   td|    ty7 } z|W  Y d}~S d}~ww )zn
            Wrapper around self.acompletion that catches exceptions and returns them as a result
            r   r  r   Nz4Received 'task.cancel'. Cancelling call w/ model={}.r   )r  r  CancelledErrorr   r  r
  r  )r   r  r   r  resultr  r1  r   r   r  	  s   zRRouter.abatch_completion_fastest_response.<locals>._async_completion_no_exceptionstaskc                    s   zNz0| I d H }t |ttfr1td  D ]}|  q|W W z |  W S  ty0   Y S w W n	 ty;   Y nw W z |  W d S  tyO   Y d S w z |  W w  ty`   Y w w )Nz=Received successful response. Cancelling other LLM API calls.)	r   rp   rn   r   r  r  r  KeyErrorr  )r  r  t)pending_tasksr   r   check_response	  s<   

zARouter.abatch_completion_fastest_response.<locals>.check_responser  )return_whenNTZ!fastest_response_batch_completionzAll tasks failedr   )splitr   r   r	   boolr   r   rp   rn   r  r  Taskr  r   waitFIRST_COMPLETEDr,  )r  r   r  r   r  r  r  r  r  doneZcompleted_taskr  r   )r  r  r   r  	  sH   
r  c                    r  r/  r   r  r   r  r  r   r  r   r   r   r  
  r  zRouter.schedule_acompletionc                    r  r/  r   r  r   r   r   r  
  r  c              
      sZ  t |}tt }t|||d}| jj|dI d H  t | j	 }	t }
| jj
}d}|
|	k r^| j||dI d H \}}| jj|j|j|dI d H }|rNnt|I d H  t }
|
|	k s1|rz#| jd|||d|I d H }|jdi  |jd dd	i |W S  ty } zt|d
| |d }~ww | jj|j|jdI d H  tjd|dd)Nr  
request_idr  requestFr   r  r{  r  Zhealth_deploymentsr  additional_headers%x-litellm-request-prioritization-usedTr  r  r  %Request timed out while polling queuer   r  r   )r   r   r   rq  rJ   r   add_requestr  	monotonicr   r   _async_get_healthy_deploymentspollr  r  r  sleepr  r,  r   r   r  r  remove_requestr   rl  )r  r   r  r  r   r  r  _request_idrS  r
  	curr_timepoll_intervalmake_request_healthy_deploymentsr  rm  r  r   r   r   r  
  sh   
r  r  .c              
      s\  t |}tt }t|||d}| jj|dI d H  t | j	 }	t }
| jj
}d}|
|	k r^| j||dI d H \}}| jj|j|j|dI d H }|rNnt|I d H  t }
|
|	k s1|rz$||i |I d H }t|jtr|jdi  |jd ddi |W S  ty } zt|d	| |d }~ww | jj|j|jd
I d H  tjd|dd)Nr  r  Fr  r  r  r  Tr  r  r  r   r  )r   r   r   rq  rJ   r   r  r  r  r   r   r  r  r  r  r  r  r   r,  r  r   r   r  r  r  r   rl  )r  r   r  r  r  r  r  r  rS  r
  r  r  r  r  r  rm  r  r   r   r   _schedule_factoryX
  sb   
zRouter._schedule_factoryc                 C   sd   | j |d}|d u st|dkrdS |d d dd }|d u s$d|vr&dS |dd }|tjv S )Nr  rx   Fr   r   r   /)get_model_listr  r   r  r   )_known_custom_logger_compatible_callbacks)r  r   r   litellm_modelsplit_litellm_modelr   r   r   r  
  s   
z"Router._is_prompt_management_modelc                    s  | dd }|d u rtdi dt t d|\}}tt|}| j|dddg|dd d}| j||d	 |d
 	 }| dd }t
|toM|d}| dpZ|d
  dd }	| dpg|d
  dd }
| dd pu|d
  dd }|s|	d u st
|	tstd|	 dt|	 |
d urt
|
tstd|
 dt|
 |j||t|d|	|
|d\}}}h d  fdd| D }i |||}||d< ||d< ||d< |	|d< |
|d< ||d< | j|d}|d u st|dkr|d tjdi |I d H S | jdi |I d H S )Nr  r  )r  Z	rules_objr	  userpromptr?  r  r   r  r  r  r   r   zlitellm_agent/	prompt_idprompt_variablesprompt_labelz*Prompt ID is not set or not a string. Got=z, type=z2Prompt variables is set but not a dictionary. Got=r  )r   r  non_default_paramsr  r  r  >   r  Zbitbucket_configr  r  Zdotprompt_configZprompt_versionc                       i | ]\}}| vr||qS r   r   r%  kvZprompt_management_paramsr   r   
<dictcomp>
      z5Router._prompt_management_factory.<locals>.<dictcomp>r  r  r   r  r   )r   rr   rq   rv   r   LiteLLMLoggingr  r  r  r  r   r   
startswithr*  r   r  Zget_chat_completion_promptrt   r  r  r  r   r  r  )r  r   r  r  Zlitellm_logging_objectZprompt_management_deploymentr  r  Zis_litellm_agent_modelr  r  r  Zoptional_paramsZfiltered_dataZ_model_listr   r  r   r  
  s   






z!Router._prompt_management_factoryr  c              
   K   sv   z,||d< ||d< | j |d< |d| j|d< |di d|i | jdi |}|W S  ty: } z|d }~ww )Nr   r  r  r   r   rF  r   )_image_generationr   r   r   r   r  r  r  r  r   r  r  r  r   r   r   image_generation	  s   
zRouter.image_generationc           
   
   K   s$  d}zft d| d|  | j|dddg|dd d}| j||d	 |d
  }| j||d	}| j|  d7  < | j|d t	j
di i ||| j|d|}| j|  d7  < t d| d |W S  ty }	 zt d| dt|	 d |d ur| j|  d7  < |	d }	~	ww )Nr  #Inside _image_generation()- model: 
; kwargs: r  r  r?  r  r  r  r   rx   r  r  r  r  zlitellm.image_generation(model=r  r  r  r   )r   r  r  r  r  r  ri  r   r  r   r  r   r   r	  r  r   r   )
r  r  r   r  r  r  r  r  r  r  r   r   r   r    sV   

	
zRouter._image_generationc                    s   z+||d< ||d< | j |d< |d| j|d< | j||d | jdi |I d H }|W S  tyG } ztt| |t	
 |d |d }~ww )Nr   r  r  r   r  r  r   )_aimage_generationr   r   r  r  r  r  r  rD   r  r  r  r   r   r   aimage_generationD  s*   
zRouter.aimage_generationc              
      s  |}zt d| d|  t|}| j|dddg|dd |dI d H }| j||d |d	  }|d
 }| j||d}| j|  d7  < t	j
di i ||| j|d|}	| j||dd}
|
d urt|
tjr|
4 I d H  	 | j||dI d H  |	I d H }	W d   I d H  n1 I d H sw   Y  n| j||dI d H  |	I d H }	| j|  d7  < t d| d |	W S  ty } zt d| dt| d |d ur| j|  d7  < |d }~ww )Nr  r  r  r  r?  r  r  r  r   r   rx   r  re  rf  rd  z litellm.aimage_generation(model=r  r  r  r   )r   r  r   rc  r  r  r  ri  r   r   r  r   r  r   r  rj  rk  r   r	  r  r   r   )r  r  r   r  r  r  r  r  r  r  rn  r  r   r   r   r  Y  s   


(


zRouter._aimage_generationfilec                    s~   z"||d< ||d< | j |d< | j||d | jdi |I dH }|W S  ty> } ztt| |t |d |d}~ww )a  
        Example Usage:

        ```
        from litellm import Router
        client = Router(model_list = [
            {
                "model_name": "whisper",
                "litellm_params": {
                    "model": "whisper-1",
                },
            },
        ])

        audio_file = open("speech.mp3", "rb")
        transcript = await client.atranscription(
        model="whisper",
        file=audio_file
        )

        ```
        r   r  r  r  Nr  r   )	_atranscriptionr  r  r  r  r  rD   r  r  )r  r  r   r  r  r  r   r   r   atranscription  s(   
zRouter.atranscriptionc              
      s  |}zt d| d|  t|}| j|dddg|dd |dI d H }| j||d |d	  }| j||d}| j|  d
7  < t	j
di i ||| j|d|}	| j||dd}
|
d urt|
tjr|
4 I d H  	 | j||dI d H  |	I d H }	W d   I d H  n1 I d H sw   Y  n| j||dI d H  |	I d H }	| j|  d
7  < t d| d |	W S  ty } zt d| dt| d |d ur| j|  d
7  < |d }~ww )Nz!Inside _atranscription()- model: r  r  r  r?  r  r  r  r   rx   )r  r  r  re  rf  rd  zlitellm.atranscription(model=r  r  r  r   )r   r  r   rc  r  r  r  ri  r   r   r  r   r  r   r  rj  rk  r   r	  r  r   r   )r  r  r   r  r  r  r  r  r  r  rn  r  r   r   r   r    s   


(


zRouter._atranscriptioninputvoicec                    s<  z||d< ||d< | j |dddg|dd|dI dH }| j||d	 |d
  }|d  | j D ]\}}||vrA|||< q4|dkrL|| | q4| j||dd}	|dd}
|
durk|	durk|
|	j	krkd}n|	}t
jdi i |d|i|I dH }|W S  ty } ztt| |t |d |d}~ww )a  
        Example Usage:

        ```
        from litellm import Router
        client = Router(model_list = [
            {
                "model_name": "tts",
                "litellm_params": {
                    "model": "tts-1",
                },
            },
        ])

        async with client.aspeech(
            model="tts",
            voice="alloy",
            input="the quick brown fox jumped over the lazy dogs",
            api_base=None,
            api_key=None,
            organization=None,
            project=None,
            max_retries=1,
            timeout=600,
            client=None,
            optional_params={},
        ) as response:
            response.stream_to_file(speech_file_path)

        ```
        r  r  r  r  r?  r  Nr  r  r   r   r   r  rf  r  r  r  r   )rc  r  r  r  r   r  r   r  r   r  r   aspeechr  r  r  rD   r  r  )r  r   r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r    sd    



zRouter.aspeechc                    s~   z"||d< t |d< | j|d< | j||d | jdi |I d H }|W S  ty> } ztt| |t	 |d |d }~ww Nr   r  r  r  r  r   )
r  _arerankr  r  r  r  r  rD   r  r  r  r   r  r  r  r   r   r   arerankg  s(   
zRouter.arerankc           	   
      s$  d }zet d| d|  | j||dd |dI d H }| j||d |d  }|d }| j||d}| j|  d7  < tj	di i || j
|d	|I d H }| j|  d7  < t d
| d |W S  ty } zt d
| dt| d |d ur| j|  d7  < |d }~ww )NzInside _rerank()- model: r  r  )r   r  r  r  r   r   rx   )r  r  zlitellm.arerank(model=r  r  r  r   )r   r  rc  r  r  r  ri  r   r   r  r   r   r	  r  r   r   )	r  r   r  r  r  r  r  r  r  r   r   r   r  |  sV   
	
zRouter._arerankis_retryis_fallbackis_asyncc              
   K   s   d|dg}z_||d< ||d< | d| j|d< |di d|i | j|||dd d	}|d
  }	| j D ]\}
}|
|vrG|||
< q:|
dkrR||
 | q:t	j
di i |	|| jd|W S  tys } z|d }~ww )Nr  r?  r   r  r   r   rF  r  r  r   )r  r  r   )r   r   r   r   r  r  r  r   r  r   text_completionr   r  )r  r   r  r  r  r  r  r  r  r  r  r  r  r   r   r   r    s.   	

&zRouter.text_completionc           	         s   | dd d ur| j||d| j||f|dI d H S z"||d< ||d< | j|d< | j||d | jdi |I d H }|W S  tyY } zt	t
| |t |d |d }~ww )	Nr  )r   r  r  r  r  r   r  r  r  r  r   )r   r  r  atext_completion_atext_completionr  r  r  r  r  rD   r  r  )	r  r   r  r  r  r  r  r  r  r   r   r   r    s8   	
zRouter.atext_completionc              
      s  zt d| d|  t|}| j|d|dg|dd |dI d H }| j||d |d  }|d	 }| j||d}| j|  d
7  < t	j
di i ||| j|d|}	| j||dd}
|
d urt|
tjr|
4 I d H  	 | j||dI d H  |	I d H }	W d   I d H  n1 I d H sw   Y  n| j||dI d H  |	I d H }	| j|  d
7  < t d| d |	W S  ty } zt d| dt| d |d ur| j|  d
7  < |d }~ww )N#Inside _atext_completion()- model: r  r  r?  r  r  r  r   r   rx   r  re  rf  rd  zlitellm.atext_completion(model=r  r  r  r   )r   r  r   rc  r  r  r  ri  r   r   r  r   r  r   r  rj  rk  r   r	  r  r   r   )r  r   r  r  r  r  r  r  r  r  rn  r  r   r   r   r       



(


zRouter._atext_completion
adapter_idc           	         s   z/||d< ||d< | j |d< |d| j|d< |di d|i | jdi |I d H }|W S  tyK } ztt	| |t
 |d |d }~ww )	Nr   r  r  r   r   rF  r  r   )_aadapter_completionr   r   r   r   r  r  r  r  rD   r  r  )	r  r  r   r  r  r  r  r  r  r   r   r   aadapter_completion7  s*   	
zRouter.aadapter_completionc              
      s  zt d| d|  t|}| j|dddg|dd |dI d H }| j||d |d	  }|d
 }| j||d}| j|  d7  < t	j
di i ||| j|d|}	| j||dd}
|
d urt|
tjr|
4 I d H  	 | j||dI d H  |	I d H }	W d   I d H  n1 I d H sw   Y  n| j||dI d H  |	I d H }	| j|  d7  < t d| d |	W S  ty } zt d| dt| d |d ur| j|  d7  < |d }~ww )Nz&Inside _aadapter_completion()- model: r  r  zdefault textr?  r  r  r  r   r   rx   )r  r  r  re  rf  rd  z"litellm.aadapter_completion(model=r  r  r  r   )r   r  r   rc  r  r  r  ri  r   r   r  r   r  r   r  rj  rk  r   r	  r  r   r   )r  r  r   r  r  r  r  r  r  r  rn  r  r   r   r   r  T  r  zRouter._aadapter_completionc                    s*   ddl m} |jd| |d|I dH S )z
        Helper function to make a search API call through the router with load balancing and fallbacks.
        Reuses the router's retry/fallback infrastructure.
        r   SearchAPIRouter)router_instancer  Nr   )&litellm.router_utils.search_api_routerr  Zasync_search_with_fallbacks)r  r  r  r  r   r   r   _asearch_with_fallbacks  s   zRouter._asearch_with_fallbacksoriginal_generic_functionc                    s,   ddl m} |jd| ||d|I dH S )z
        Helper function for search API calls - selects a search tool and calls the original function.
        Called by async_function_with_fallbacks for each retry attempt.
        r   r  )r  r   r
  Nr   )r  r  Z"async_search_with_fallbacks_helper)r  r   r
  r  r  r   r   r   _asearch_with_fallbacks_helper  s   z%Router._asearch_with_fallbacks_helperguardrail_namec                    s\   ||d< ||d< | j |d< | j||dd td| d|  | jd	i |I dH }|S )
aw  
        Execute a guardrail with load balancing and fallbacks.

        Args:
            guardrail_name: Name of the guardrail to execute
            original_function: The guardrail's execution function (e.g., async_pre_call_hook)
            **kwargs: Additional arguments passed to the guardrail

        Returns:
            Result from the guardrail execution
        r   r
  r  litellm_metadatar   r  ro  z&Inside aguardrail() - guardrail_name: r  Nr   )_aguardrail_helperr  r   r  r  )r  r  r  r  r  r   r   r   
aguardrail  s   
zRouter.aguardrailc                    sP   |}| j |d}td|di d  ||d< |di |I dH }|S )a}  
        Helper for aguardrail - selects a guardrail deployment and executes it.
        Called by async_function_with_fallbacks for each retry attempt.

        Args:
            model: The guardrail_name (named 'model' for fallback system compatibility)
            original_generic_function: The guardrail's execution function
            **kwargs: Additional arguments
        r  zSelected guardrail deployment: r   Z	guardrailselected_guardrailNr   )get_available_guardrailr   r  r   )r  r   r
  r  r  r  r  r   r   r   r    s   zRouter._aguardrail_helperrY   c                    sZ   ddl m}  fdd| jD }|std  t|dkr#|d S tt|| | dS )z
        Select a guardrail deployment using the router's load balancing strategy.

        Args:
            guardrail_name: Name of the guardrail to select

        Returns:
            Selected guardrail configuration dict
        r   r,   c                    s   g | ]}| d  kr|qS r  r  )r%  gr  r   r   r'    r  z2Router.get_available_guardrail.<locals>.<listcomp>zNo guardrail found with name: rx   llm_router_instancer   r   )&litellm.router_strategy.simple_shuffler-   r   r*  r  r   rY   )r  r  r-   r   r   r  r   r    s    
zRouter.get_available_guardrailc                    s   z.||d< ||d< | j |d< | j||dd td| d|  | jd
i |I dH }|W S  tyJ } ztt| |t	
 |d	 |d}~ww )
        Helper function to make a generic LLM API call through the router, this allows you to use retries/fallbacks with litellm router
        r   r
  r  r  r  z3Inside ageneric_api_call_with_fallbacks() - model: r  Nr  r   )(_ageneric_api_call_with_fallbacks_helperr  r   r  r  r  r  r  rD   r  r  )r  r   r  r  r  r  r   r   r   !_ageneric_api_call_with_fallbacks  s2   
z(Router._ageneric_api_call_with_fallbacksr  c                 C   sv   d|v r9|d r9ddl m} z|||d|dd\}}}}|}W n ty.   |}Y nw |d |||d< |S )a  
        Add the deployment model to the endpoint for LLM passthrough route.

        e.g for bedrock invoke users can pass endpoint as /model/special-bedrock-model/invoke
          it should be actually sent as /model/us.anthropic.claude-3-5-sonnet-20240620-v1:0/invoke
        endpointr   )rs   r*  r  )r   r*  r  )r   rs   r   r  replace)r  r  r   r  rs   Zstripped_model_namer  Zreplacement_model_namer   r   r   ;_add_deployment_model_to_endpoint_for_llm_passthrough_route5  s    	zBRouter._add_deployment_model_to_endpoint_for_llm_passthrough_routec              
      s  | dd}d}zt|}z| j|||dd| dddI dH }W n# tyG } z|rA|dd|i|I dH W  Y d}~W S |d}~ww | j|||d	 |d
  }	|	d }
| j|
  d7  < | j|||
d |di i |	d| j	i|}| j
||dd}|durt|tjr|4 I dH  	 | j||dI dH  |I dH }W d  I dH  n1 I dH sw   Y  n| j||dI dH  |I dH }| j|
  d7  < td|
 d |W S  ty } ztd| dt| d |dur| j|  d7  < |d}~ww )r  passthrough_on_no_deploymentFr  r  Nr  )r   r  r  r  r   r  r   rx   )r  r   r  r  re  rf  rd  z'ageneric_api_call_with_fallbacks(model=r  r  r  r   )r  r   rc  r   r  r  r  r   r  r   r  r   r  rj  rk  r   r   r	  r   r   )r  r   r
  r  r  rx  r  r  r  r  r  r  rn  r   r   r   r  T  s   

&
(



z/Router._ageneric_api_call_with_fallbacks_helperc                 K   s  |j }ztd| d| d|  | j||dd|ddd}| j||dd	 |d
  }|d }| j|  d7  < d|v rO|d rO|d 	|||d< ||d< | j
|d zt|d d\}}	}}W n tyq   d}	Y nw |di i ||	| jd|}
| j|  d7  < t| d| d |
W S  ty } zt| d| dt| d |dur| j|  d7  < |d}~ww )a  
        Make a generic LLM API call through the router, this allows you to use retries/fallbacks with litellm router
        Args:
            model: The model to use
            original_function: The handler function to call (e.g., litellm.completion)
            **kwargs: Additional arguments to pass to the handler function
        Returns:
            The response from the handler function
        z&Inside _generic_api_call() - handler: z	, model: r  r  Nr  r  Zgeneric_api_callr  r   r   rx   r  r  rp  )r*  r  z(model=r  r  r  r   )r   r   r  r  r   r  r  r  r   r  r  rs   r  r   r   r	  r   r   )r  r   r  r  handler_namer  r  r  r  r*  r  r  r   r   r    _generic_api_call_with_fallbacks  sb   

	z'Router._generic_api_call_with_fallbacksc              
   K   s\   z||d< ||d< | j |d< | j||d | jdi |}|W S  ty- } z|d }~ww )Nr   r  r  r  r   )
_embeddingr  r  r  r  r   r  r  r  r  r  r   r   r   	embedding  s   
zRouter.embeddingc              
   K   sV  d }zt d| d|  | j|||dd d}| j||d |d  }|d }| j||dd	}|d
d }|d urI|d urI||jkrId }	n|}	| j	|  d7  < | j
|d tjdi i ||| j|	d|}
| j|  d7  < t d| d |
W S  ty } zt d| dt| d |d ur| j|  d7  < |d }~ww )NzInside embedding()- model: r  r  )r   r  r  r  r   r   syncrf  r  rx   r  r  r  r  zlitellm.embedding(model=r  r  r  r   )r   r  r  r  r  r  r  r   r  r   r  r   r#  r   r   r	  r  r   r   )r  r  r   r  r  r  r  r  r  r  r  r  r   r   r   r!    sb   

	
zRouter._embeddingc                    s~   z"||d< ||d< | j |d< | j||d | jdi |I d H }|W S  ty> } ztt| |t |d |d }~ww r  )	_aembeddingr  r  r  r  r  rD   r  r  r"  r   r   r   
aembedding1  s(   
zRouter.aembeddingc              
      s  d }zt d| d|  t|}| j|||dd |dI d H }| j||d |d  }|d }| j||d}| j|  d7  < t	j
di i ||| j|d	|}	| j||d
d}
|
d urt|
tjr|
4 I d H  	 | j||dI d H  |	I d H }	W d   I d H  n1 I d H sw   Y  n| j||dI d H  |	I d H }	| j|  d7  < t d| d |	W S  ty } zt d| dt| d |d ur| j|  d7  < |d }~ww )NzInside _aembedding()- model: r  r  )r   r  r  r  r  r   r   rx   r%  re  rf  rd  zlitellm.aembedding(model=r  r  r  r   )r   r  r   rc  r  r  r  ri  r   r   r'  r   r  r   r  rj  rk  r   r	  r  r   r   )r  r  r   r  r  r  r  r  r  r  rn  r  r   r   r   r&  J  s   

(


zRouter._aembeddingc                    s   z'||d< | j |d< |d| j|d< | j||d | jdi |I d H }|W S  tyC } ztt| |t	
 |d |d }~ww )Nr   r  r   r  r  r   )_acreate_filer   r   r  r  r  r  r  rD   r  r  r  r   r   r   acreate_file  s(   
zRouter.acreate_filec                    sN  zyddl m} td| d   t j|dddg dd  d	I d H }d
tdtf fdd}g }t	|trH|
|| n|D ]	}|
|| qJtj| I d H }t|dkrftd|||d}	tt|d }
|	|
jd< |
W S  ty } z td| d  dt| d |d urj|  d7  < |d }~ww )Nr   )add_model_file_id_mappingsr   r  r  files-api-fake-textr?  r  r   r  r  r  r  r  r   c              	      s  ddl m} | }j| |dd | d  }|d }j| |d}j|  d7  < t|d d	\}}}}ttt	  
d
}	ttt  
d}
|
rP|	sTtdt|	d}|ret|
|d}
|
|d< d|v rs|d |di d< tjdi i ||j|d|}j| |dd}|d urt|tjr|4 I d H  	 j| dI d H  |I d H }W d   I d H  n1 I d H sw   Y  nj| dI d H  |I d H }j|  d7  < td| d |S )Nr   r  r)  r  r   r   r  rx   rp  purposer  z2file and file_purpose are required for create_file)r-  )Zfile_contentZnew_model_nameZgcs_bucket_namer  r*  r  r  re  rf  rd  litellm.acreate_file(model=r  r   )r  r  r  r  ri  r   rs   r   r   rO   r   rM   r  r3   r2   r   r   r)  r   r  r   r  rj  rk  r   r   r	  )r  r  Zkwargs_copyr  r  r  Zstripped_modelr*  r  r-  r  Zreplace_model_in_jsonl_boolr  rn  r  r  r  r   r   create_file_for_deployment  s   


(


z8Router._acreate_file.<locals>.create_file_for_deploymentNo healthy deployments found.)r   r;  model_file_id_mappingr/  , r  r  rx   )!litellm.router_utils.common_utilsr*  r   r  r   async_get_healthy_deploymentsr  r  rN   r   r   r  r  r  r  r   r,  rs  r   r   )r  r   r  r*  r   r1  tasksr  r;  r3  Zreturned_responser  r   r0  r   r(    sR   

X
zRouter._acreate_filec              
      s  z|du rddl m} | j|dd}|di |I dH W S ddlm} t|}| j|dddg|d	d|d
I dH }|d  }|d }	| j||dd | j	||d}
| j
|	  d7  < t|d d\}}}}|di i ||| j|
d|}| j||dd}|durt|tjr|4 I dH  | j||dI dH  |I dH }W d  I dH  n1 I dH sw   Y  n| j||dI dH  |I dH }| j|	  d7  < td|	 d |W S  ty } ztd| dt| d |dur| j|  d7  < |d}~ww )z
        Create a vector store for a specific model.

        Args:
            model: Model name from router config
            **kwargs: Vector store creation parameters

        Returns:
            VectorStoreCreateResponse
        Nr   rS  avector_store_creater3  r  zvector-store-api-fake-textr?  r  r  r   r   r  r  rx   rp  r.  re  rf  rd  z#litellm.avector_store_create(model=r  r  r  r   )rQ  rT  r0  Zlitellm.vector_storesr   rc  r  r  r  ri  r   rs   r   r  r   r  rj  rk  r   r   r	  r  rs  r   r   )r  r   r  rT  Z
factory_fnZavector_store_create_sdkr  r  r  r  r  r  r*  r  rn  r  r   r   r   r8  3  s   



(


zRouter.avector_store_createc                 C   s   dS )aE  
        Override factory-generated vector store methods with router-aware implementations.
        This is called after _initialize_vector_store_endpoints() to ensure our custom
        methods that handle deployment selection and credential injection are used instead
        of the generic factory-generated ones.
        Nr   r1  r   r   r   r    s   	z0Router._override_vector_store_methods_for_routerc                    s   z-||d< | j |d< |d| j|d< tdd}| j|||d | jdi |I d H }|W S  tyI } ztt	| |t
 |d |d }~ww )	Nr   r  r   _acreate_batchrz  r  r  r   )r9  r   r   r1   r  r  r  r  r  rD   r  r  r  r   r  ro  r  r  r   r   r   acreate_batch  s6   
zRouter.acreate_batchc                      zt d| d|  t|}| j|dddg|dd |dI d H }|d  }|d	 }| j||d
d | j||d}| j|  d7  < t	|d	 d\}}	}}t
jdi i ||	| j|d|}
| j||dd}|d urt|tjr|4 I d H  	 | j||dI d H  |
I d H }
W d   I d H  n1 I d H sw   Y  n| j||dI d H  |
I d H }
| j|  d7  < t d| d |
W S  ty } z t d| d| dt| d |d ur| j|  d7  < |d }~ww )Nz Inside _acreate_batch()- model: r  r  r+  r?  r  r  r   r   r9  r  r  rx   rp  r.  re  rf  rd  zlitellm.acreate_batch(model=r  zlitellm._acreate_batch(model=r4  r  r  r   )r   r  r   rc  r  r  r  ri  r   rs   r   r;  r   r  r   r  rj  rk  r   r	  r  rs  r   r   r  r   r  r  r  r  r  r  r  r*  r  rn  r  r   r   r   r9       



(


zRouter._acreate_batchc                    sv  zt  }|dur j|dddg dd |dI dH }n }|du r,tdg dtf fd	d
|dur[t|tr[t|dkr[t	j
fdd|D ddiI dH }n|durot|trott|I dH }ntd|durt|tr|W S t|tr|D ]}t|tr|  W S qrd td ty } zt	t t |d |d}~ww )z
        Iterate through all models in a model group to check for batch

        Future Improvement - cache the result.
        Nr  zretrieve-api-fake-textr?  r  r,  Router not yet initialized.r  c           	   
      s   z]ddl m} | d d}| d  }|d}|d u r%td|  |s0t|d\}}}}| }jtt| |dd	 |	dd  |	dd  t
jd
i i |d|i|I d H W S  ty~ } zdd l}|  | W Y d }~d S d }~ww )Nr   r  r   r   r*  z2Model not found in litellm_params for deployment: rp  aretrieve_batchr  r   )r  r  r   r  r  rs   r  r   r  r  r   r@  r  	print_excr   )	r  r  r   r  r*  r  Z
new_kwargsr  r  )r  receieved_exceptionsr  r   r   try_retrieve_batch*  sL   

z2Router.aretrieve_batch.<locals>.try_retrieve_batchr   c                    s   g | ]	} t t|qS r   )r   rX   r%  r   rC  r   r   r'  X  s    z*Router.aretrieve_batch.<locals>.<listcomp>return_exceptionsTr2  z7Unable to find batch in any model. Received errors - {}r  )r   r6  r  r  r  rX   r   r   r  r  r  r  r   ri   r
  r  rD   r  r  )r  r   r  r  filtered_model_listresultsr  r  r   )r  rB  r  rC  r   r@    s|   




)





zRouter.aretrieve_batchc                    s   z-||d< | j |d< |d| j|d< tdd}| j|||d | jd	i |I dH }|W S  tyI } ztt	| |t
 |d |d}~ww )
zZ
        Cancel a batch through the router with proper model-to-provider mapping.
        r   r  r   _acancel_batchrz  r  Nr  r   )rI  r   r   r1   r  r  r  r  r  rD   r  r  r:  r   r   r   acancel_batch  s6   
zRouter.acancel_batchc                    r<  )Nz Inside _acancel_batch()- model: r  r  zbatch-api-fake-textr?  r  r  r   r   rI  r  r  rx   rp  r.  re  rf  rd  zlitellm.acancel_batch(model=r  zlitellm._acancel_batch(model=r4  r  r  r   )r   r  r   rc  r  r  r  ri  r   rs   r   rJ  r   r  r   r  rj  rk  r   r	  r  rs  r   r   r=  r   r   r   rI    r>  zRouter._acancel_batchc                    s   | j |d}|du rtddtf fddtjfdd|D  I dH }d	g ddd
d}|D ]3}|durd|d du rIt|drIt|d|d< t|d|d< |d |j t|dd
du rdd|d< q1|S )zQ
        Return all the batches across all deployments of a model group.
        r  Nr?  r   c                    s<   zt jdi i | d  I d H W S  ty   Y d S w )Nr   r   )r   alist_batchesr  rp  r  r   r   rC    s   z0Router.alist_batches.<locals>.try_retrieve_batchc                    s   g | ]} |qS r   r   rD  rE  r   r   r'    r  z(Router.alist_batches.<locals>.<listcomp>r   F)objectr  first_idlast_idhas_morerM  rN  r  rO  T)	r  r  rX   r  r  r"  r!  extendr  )r  r   r  rG  rH  Zfinal_resultsr  r   )r  rC  r   rK    s0   	

zRouter.alist_batchesr*  c                    s   | j |dd|d |dr?| j|d dr?| j|d |dI d H }|d d |d< |d  }| j||d || |di |I d H S )	Nr   r  r  r  )r   r  r   r  r   )r  r   r  rc  r  r  r   )r  r  r*  r  r  r  r   r   r   )_pass_through_moderation_endpoint_factory*  s$   

z0Router._pass_through_moderation_endpoint_factory
assistantsr4  )TrR  r2  r5  r8  r=  r>  r;  r<  r?  r9  r:  rA  rB  rC  rD  rE  rF  rG  rH  rI  r@  r6  rn  rm  ro  rN  r8  r`  rb  rd  rf  rh  rj  rO  rP  ra  rc  re  rg  ri  rk  rq  rr  rK  rM  r7  rv  r{  rw  r|  ry  r~  ru  rz  rx  r}  r  r  r  r  r  r  r  r  aupload_container_fileZupload_container_filealist_container_filesZlist_container_filesaretrieve_container_fileZretrieve_container_fileadelete_container_fileZdelete_container_filer  r  r  r  r  r  r  r  r  r  r  r  c                    s    dv r		ddt t dt t ffdd}|S  dv r1		ddt t dt t ffdd	}|S 		ddt t dt t f fd
d}|S )z
        Creates appropriate wrapper functions for different API call types.

        Returns:
            - A synchronous function for synchronous call types
            - An asynchronous function for asynchronous call types
        )r;  rn  ro  rO  rP  rr  rM  r{  r|  r~  rz  r}  r  r  r  r  Nr*  r  c                    s   j dd i|S )Nr  r   )r   r*  r  r  )r  r  r   r   sync_wrapper  s
   z-Router.factory_function.<locals>.sync_wrapper)ra  rc  re  rg  ri  rk  c                    s    d| |d|S )Nr*  r  r   r   rW  )r  r   r   vector_store_file_sync_wrapper  s   z?Router.factory_function.<locals>.vector_store_file_sync_wrapperc                    s   dkrj d| |d|I d H S  dkr%jddi|I d H S  dv r6jddi|I d H S  dv rHjd| d|I d H S  dv rYjddi|I d H S  d	v rkjd| d|I d H S  d
kr}jddd|I d H S  dv rjddi|I d H S  dv rjd| d|I d H S  dv rjd| |d|I d H S  dv rňjd| d|I d H S d S )NrR  )r  r*  r  r2  r  rs  )r`  rb  rd  rf  rh  rj  )r  r*  )r5  r8  rA  rB  rC  rD  rE  rF  rG  rH  r6  rm  rq  rr  rv  rw  ry  ru  rx  r  r  r  r  r  r  )	r  r  r  r  rS  rT  rU  rV  Z aretrieve_container_file_contentrI  T)r  r  )r<  r=  r>  r?  r@  )rN  r8  )r9  r:  )r  r  r  r   ))_pass_through_assistants_endpoint_factoryrQ  r	   _init_vector_store_api_endpointsr  _init_containers_api_endpoints_init_responses_api_endpoints _init_interactions_api_endpointsrW  r4  r  r  r   r   async_wrapper  s   z.Router.factory_function.<locals>.async_wrapperNN)r   r   r   )r  r  r4  rX  rZ  ra  r   r`  r   r0  D  s4   b	
}zRouter.factory_functionc                    sN   |rd|vr||d< | dr| jdd|i|I dH S |di |I dH S )z
        Initialize the Vector Store API endpoints on the router.

        If a model is provided in kwargs, use model-based routing to get
        the deployment credentials. Otherwise, call the original function directly.
        r*  r   r  Nr   )r   r  r  r  r*  r  r   r   r   r\  Z  s   
z'Router._init_vector_store_api_endpointsc                    s*   |rd|vr||d< |di |I dH S )z
        Initialize the Containers API endpoints on the router.

        Container operations don't need model-based routing, so we call the
        original function directly with the custom_llm_provider.
        r*  Nr   r   rc  r   r   r   r]  s  s   z%Router._init_containers_api_endpointsc                    sH   ddl m} ||d}|dur||d< | jdd|i|I dH S )z
        Initialize the Responses API endpoints on the router.

        GET, DELETE, CANCEL Responses API Requests encode the model_id in the response_id, this function decodes the response_id and sets the model to the model_id.
        r   )ResponsesAPIRequestUtilsZresponse_idNr   r  r   )Zlitellm.responses.utilsrd  Zget_model_id_from_response_idr   r  )r  r  r  rd  model_idr   r   r   r^    s   
z$Router._init_responses_api_endpointsc                    s:   |rd|vr||d< d|vrd|d< |di |I dH S )z
        Initialize the Interactions API endpoints on the router.

        GET, DELETE, CANCEL Interactions API Requests don't need model-based routing,
        so we call the original function directly with the custom_llm_provider.
        r*  geminiNr   r   rc  r   r   r   r_    s   z'Router._init_interactions_api_endpointsr  c                    sR   |du r| j dur| j d }|| j d  ntd|d||d|I dH S )z@Internal helper function to pass through the assistants endpointNr*  r   z'custom_llm_provider' must be set. Either via:
 `Router(assistants_config={'custom_llm_provider': ..})` 
or
 `router.arun_thread(custom_llm_provider=..)`rY  r   )r   r   r  )r  r  r*  r  r  r   r   r   r[    s   

z0Router._pass_through_assistants_endpoint_factoryr  rE  rF  c	                    sJ  t dt   |}	d}
|d}d}|du s|du r || |	d|}d|vr0| j|d< d|vr8d	|d< zt d
 t|d}|r[|||d t	|i |I dH }|W S t
|tjr|dur| j||d}|du rr|	|||d t	|i |I dH }|W S d|||}t jd|d | jd|7  _nFt
|tjr|dur| j||d}|du r|	|||d t	|i |I dH }|W S d|||}t jd|d | jd|7  _|durO|durOt d|  t|tt|d\}
}|
du r|dur|| d }
|
du r:t d| d|  t|	dr8|	 jd| d| 7  _|	||
|d t	|i |I dH }|W S W n0 ty } z#t|}t dt|t t| |dI dH  t|}W Y d}~nd}~ww t|	dr|	 jd||
7  _t|d	kr|	 jd|7  _|	)zD
        Common utilities for async_function_with_fallbacks
        	TracebackNr   r  T)Zlitellm_routerr  r   Zfallback_depthr   zTrying to fallback b/w models)r   )fallback_model_grouporiginal_model_group)r   rF  zmodel={}. context_window_fallbacks={}. fallbacks={}.

Set 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbackszGot 'ContextWindowExceededError'. No context_window_fallback set. Defaulting                         to fallbacks, if available.{})msgz
{}zmodel={}. content_policy_fallback={}. fallbacks={}.

Set 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbackszGot 'ContentPolicyViolationError'. No content_policy_fallback set. Defaulting                         to fallbacks, if available.{}zinside model fallbacks: r   z7No fallback model group found for original model_group=z. Fallbacks=r  zlitellm.router.py::async_function_with_fallbacks() - Error occurred while trying to do fallbacks - {}
{}

Debug Information:
Cooldown Deployments={}r  r  z<. Received Model Group={}
Available Model Group Fallbacks={}z
Error doing the fallback: {})r   r  r  r  r   r   r	  r?   r   rA   r   r   ContextWindowExceededError(_get_fallback_model_group_from_fallbacksr
  r  r  r@   r   r   r"  r  r   r  r<   r  )r  r  rE  r   r   r   rF  r  r  r  rh  ri  Zfallback_failure_exception_strr  Zis_non_standard_fallback_formatr  Z#context_window_fallback_model_grouperror_messageZ#content_policy_fallback_model_groupZgeneric_fallback_idxZnew_exceptionr  r   r   r   rO    s.  






z1Router.async_function_with_fallbacks_common_utilsc                    s  | d}|dd}| d| j}| d| j}| d| j}|dd}z@| j|||||d	 |durF| j|i |d|iI dH }	n| j|i |I dH }	tt	j
r_td
|	  t|	dd}	|	W S  ty }
 z| |
|||||||I dH W  Y d}
~
S d}
~
ww )z
        Try calling the function_with_retries
        If it fails after num_retries, fall back to another model group
        r   rE  Fr   r   r   mock_timeoutN)r  rF  r   r   r   zAsync Response: r   )r  Zattempted_fallbacks)r   r  r   r   r   _handle_mock_testing_fallbacksasync_function_with_retriesr   isEnabledForr   r   r  r/   r  rO  )r  r  r  rF  rE  r   r   r   ro  r  r  r   r   r   r    s`   
z$Router.async_function_with_fallbacksc                 C   s   t |}|jdur|jdu rtj|dd| d| d|jdur5|jdu r5tj|dd| d| d|jdurM|jdu rOtj|dd| d| ddS dS )	a  
        Helper function to raise a litellm Error for mock testing purposes.

        Raises:
            litellm.InternalServerError: when `mock_testing_fallbacks=True` passed in request params
            litellm.ContextWindowExceededError: when `mock_testing_context_fallbacks=True` passed in request params
            litellm.ContentPolicyViolationError: when `mock_testing_content_policy_fallbacks=True` passed in request params
        NTr  #This is a mock exception for model=z#, to trigger a fallback. Fallbacks=)r   r  r  zF, to trigger a fallback.                     Context_Window_Fallbacks=zF, to trigger a fallback.                     Context_Policy_Fallbacks=)	r[   Zfrom_kwargsZmock_testing_fallbacksr   ZInternalServerErrorZmock_testing_context_fallbacksrl  Z%mock_testing_content_policy_fallbacksr  )r  r  rF  r   r   r   Zmock_testing_paramsr   r   r   rp    s8   






z%Router._handle_mock_testing_fallbacksc                    s  t d |d}|d| j}t|}|d| j}|d| j}|d| j}|d}	|d}
|d	|d
p?i }d|v r`t	|d t
r`| j|d d}|d ur`|dt|i t d| d|
  d|d< |
|d< z | j|	|d | j|g|R i |I d H }t|dd d}|W S  ty } z4d }|}t|dd }|d urt	|tr|}
	 | j|dpd|dI d H \}}d}| jd us|d ur|	p|dp|d}t|||| jd}|d ur|}
d}|s| j||||||d |
|d< |
dkr
| j||d}n t d|
  | j||
|
||d}t|I d H  t|
D ]}z8|d |d< |
|d< | j|g|R i |I d H }t|rP|I d H }t||d |
d}|W   W  Y d }~S  ty } z?| j||d}|
| d }|d}|d ur| j||dI d H \}}ng }| j|||
||d}t|I d H  W Y d }~q)d }~ww t|t j!v rt"|d|
 |d ur|d n|
}t"|d| |d }~ww )Nz#Inside async function with retries.r  r   r   r   r   r   r   r  r   rF  r  Zmodel_group_sizez/async function w/ retries: original_function - z, num_retries - r   attempted_retriesr   )rF  r  )r  rt  r   r  r  Frs  rF  r   r   T)r  r   all_deploymentsr   regular_fallbacksr   )r  r  z#Retrying request with num_retries: )r  remaining_retriesr   r   rv  rx   )#r   r  r  r   r   r   r   r   r   r   r   r  r   r  %_handle_mock_testing_rate_limit_error	make_callr0   r  r!  r   r  r   "_get_num_retries_from_retry_policyshould_retry_this_error	log_retry_time_to_sleep_before_retryr  r  r  r    is_async_callabler   r   ZLITELLM_EXCEPTION_TYPESr  )r  r  r  r  r   r  r   r   r   rF  r   	_metadatar   r  r  Zcurrent_attemptr  Zdeployment_num_retriesr  _all_deploymentsZ_retry_policy_appliesZ_model_group_for_retry_policyZ_retry_policy_retriesr   rx  _modelr  _timeoutZactual_retries_attemptedr   r   r   rq    s  





	




 z"Router.async_function_with_retriesc                    sP   | d}||i |}t|st|r|I dH }| j||dI dH }|S )z^
        Handler for making a call to the .completion()/.embeddings()/etc. functions.
        r   N)r  rF  )r   r    r  r  isawaitableset_response_headers)r  r  r  r  rF  r  r   r   r   rz    s   

zRouter.make_callc                 C   s   | dd}| j|d}d}|dur&t|dkr&ttt |d d d}|durC|du rEtd	|  t	j
|d
d| d|ddS dS )z
        Helper function to raise a mock litellm.RateLimitError error for testing purposes.

        Raises:
            litellm.RateLimitError error when `mock_testing_rate_limit_error=True` passed in request params
        mock_testing_rate_limit_errorNr  rx   r   r   r   TzTlitellm.router.py::_mock_rate_limit_error() - Raising mock RateLimitError for model=r  rs  z , to trigger a rate limit error.)r   r  r  r   )r  r  r  r   r   r   r   r   r	  r   RateLimitError)r  r  rF  r  Zavailable_modelsr   r   r   r   ry    s,   	
z,Router._handle_mock_testing_rate_limit_errorr  r   rv  rw  c           
      C   s   d}|durt |trt|}d}|durt |trt|}t |tjr*|dur*|t |tjr6|dur6|t|dd}	|	durKt|	sK|	dvrK|t |tjrS|t |t	j
ri|dkri|durit|dkri|t |t	jrv	 |dkrv||dkr||dS )au  
        1. raise an exception for ContextWindowExceededError if context_window_fallbacks is not None
        2. raise an exception for ContentPolicyViolationError if content_policy_fallbacks is not None

        2. raise an exception for RateLimitError if
            - there are no fallbacks
            - there are no healthy deployments in the same model group
        r   Nstatus_code)i  i  rx   T)r   r   r  r   rl  r  r!  Z_should_retryZNotFoundErrorr   r  AuthenticationError)
r  r  r   rv  r   r   rw  Z_num_healthy_deploymentsZ_num_all_deploymentsr  r   r   r   r|    s@   

zRouter.should_retry_this_errorc                 O   s   t | jg|R i |S )z~
        Sync wrapper for async_function_with_fallbacks

        Wrapped to reduce code duplication and prevent bugs.
        )r   r  )r  r  r  r   r   r   r    s   zRouter.function_with_fallbacksc                 C   s@   |du rdS d}|D ]}t | d |kr|| } |S q
|S )a@  
        Returns the list of fallback models to use for a given model group

        If no fallback model group is found, returns None

        Example:
            fallbacks = [{"gpt-3.5-turbo": ["gpt-4"]}, {"gpt-4o": ["gpt-3.5-turbo"]}]
            model_group = "gpt-3.5-turbo"
            returns: ["gpt-4"]
        Nr   )r   keys)r  r   rF  rh  rS  r   r   r   rm    s   z/Router._get_fallback_model_group_from_fallbacksc                 C   sZ   | j du rdS | j D ] }t|tr*d|v r*|d }t|tr*t|dkr*|d   S q
dS )zX
        Returns the first model from the default_fallbacks list, if it exists.
        Nr   r   )r   r   r  r   r  )r  fallbackZdefault_listr   r   r   _get_first_default_fallback1  s   

z"Router._get_first_default_fallbackrx  c                 C   s   |durt |dkrn|durt|trt |dkrdS d}t|dr-t|jdr-|jj}t|dr5|j}|durEtj|||| j	d}|S tj||| j	d}|S )	z
        Calculate back-off, then retry

        It should instantly retry only when:
            1. there are healthy deployments in the same model group
            2. there are fallbacks for the completion call
        Nrx   r   r  headerslitellm_response_headers)rx  r   response_headersmin_timeout)rx  r   r  )
r  r   r   r"  r  r  r  r   Z_calculate_retry_afterr   )r  r  rx  r   r   rv  r  r   r   r   r   r~  >  s4   
z"Router._time_to_sleep_before_retryc              
      s6  ddl m} z|dd}|du rtd|d ddu r"W dS |d d dd}|d d d	d}|d d
i p?i }	|	dd}
|du sN|
du rQW dS t|
trZt|
}
| j|
d}|du rgW dS | j||d}|dd}|dd}|j	j
}|j	j}|dd}|dd}t| |
d |du r|du r|du r|du r|du r|du rW dS t|}|dd}t }|d}tjjj|
||d}g }||||tjjd tjjj|
||d}|||dtjjd | jj||dI dH  |W S  ty } ztdt| W Y d}~dS d}~ww )zG
        Track remaining tpm/rpm quota for model in model_list
        r   )RedisPipelineIncrementOperationr  Nzstandard_logging_object is Noner   r   r  rF  ry  r{  re  r  received_model_nametpmrpmr  deployment_idtotal_tokens%H-%Mr{  current_minuter   )rv  Zincrement_valuer   rx   )Zincrement_listr  zOlitellm.router.Router::deployment_callback_on_success(): Exception occured - {})Zlitellm.types.cachingr  r   r*  r   r   r   get_deploymentget_router_model_infor   r  r  rI   r   rv   strftimer_   TPMr$  r
  r   r   r   RPMr   Zasync_increment_cache_pipeliner  r   r  )r  r  completion_responser	  r
  r  r  deployment_namerF  ry  r{  deployment_infoZdeployment_model_infor  r  Ztpm_litellm_paramsZrpm_litellm_paramsZtpm_model_infoZrpm_model_infor  r  dtr  Ztpm_keyZpipeline_operationsrpm_keyr  r   r   r   r   q  s   

	z%Router.deployment_callback_on_successc           	      C   s   d}|d  ddu rn-|d d  dd}|d  di pi }| dd}|du s.|du r0dS t|tr9t|}|durEt| |d}|S dS )z
        Tracks the number of successes for a deployment in the current minute (using in-memory cache)

        Returns:
        - key: str - The key used to increment the cache
        - None: if no key is found
        Nr   r   rF  ry  r{  r  )r   r   r   r   rI   )	r  r  r  r	  r
  r{  rF  ry  rv  r   r   r   r    s"   
z*Router.sync_deployment_callback_on_successc              
   C   s  t d zw|dd}t|dd}|di }|di }tjjj|d}	|d	d}
d}|	dur9tjj	|	d
}|
durD|
dkrD|
}n|durO|dkrO|}n| j
}t|tru|d}|du rcW dS t| |d t| ||||d}|W S t d W dS  ty } z|d}~ww )a`  
        2 jobs:
        - Tracks the number of failures for a deployment in the current minute (using in-memory cache)
        - Puts the deployment in cooldown if it exceeds the allowed fails / minute

        Returns:
        - True if the deployment should be put in cooldown
        - False if the deployment should not be put in cooldown
        z1Router: Entering 'deployment_callback_on_failure'rs  Nr  r  r   ry  )r  r   )r  r   r{  Fr  r  exception_statusr  r  Ztime_to_cooldownzWRouter: Exiting 'deployment_callback_on_failure' without cooldown. No model_info found.)r   r  r   r!  r   Zlitellm_core_utilsZexception_mapping_utilsZ_get_response_headersutilsZ&_get_retry_after_from_exception_headerr   r   r  rH   r>   r  )r  r  r  r	  r
  rs  r  r   _model_infoZexception_headersZdeployment_cooldownZheader_cooldownZ_time_to_cooldownr  r  r  r   r   r   r    sX   
	

z%Router.deployment_callback_on_failurer  c                    s   |d d  dd}|d d  dd}|d  di pi }| dd}|du s-|du r/dS t|tr8t|}t|}	t }
|
d}tjj	j
|||d	}| jj|d
|	tjj	dI dH  dS )z3
        Update RPM usage for a deployment
        r   r   r  NrF  ry  r{  r  r  rx   )rv  r$  r  r   )r   r   r   r   r   rv   r  r_   r  r$  r
  r   Zasync_increment_cacher   r   )r  r  r  r	  r
  r  rF  ry  r{  r  r  r  r  r   r   r   r  b  s2   
z+Router.async_deployment_callback_on_failure)r   r  c                 C   s   t |S )a  
        Helper to return what the "metadata" field should be called in the request data

        - New endpoints return `litellm_metadata`
        - Old endpoints return `metadata`

        Context:
        - LiteLLM used `metadata` as an internal field for storing metadata
        - OpenAI then started using this field for their metadata
        - LiteLLM is now moving to using `litellm_metadata` for our metadata
        )r   )r  r  r   r   r   '_get_metadata_variable_name_from_kwargs  s   z.Router._get_metadata_variable_name_from_kwargsc           	   
   C   s   zfd|v rdnd}t |jt|d}| D ]2\}}||ddfvr'|||< q||krIt|trIi ||< ||  D ]\}}|dkrH||| |< q:qt| jdkrW| jd | j	| | j|| d< |W S  t
yt } z|d	}~ww )
z
        When a retry or fallback happens, log the details of the just failed model call - similar to Sentry breadcrumbing
        r  r   )Zexception_typeZexception_stringr  r  r      r   N)r   r   r   r  r   r  r  r   r  r   r  )	r  r  r  Z_metadata_varZprevious_modelr  r  Z
metadata_kZ
metadata_vr   r   r   r}    s6   
zRouter.log_retryr  r  c                 C   sX   |}| j j||dd}|du rd}| j j||ddd |S |d7 }| j j||dd |S )zf
        Update deployment rpm for that minute

        Returns:
        - int: request count
        T)rv  r  
local_onlyNrx   r   )rv  r$  r  r   )rv  r$  r  )r   	get_cacheZ	set_cache)r  r  r  r  Zrequest_countr   r   r   _update_usage  s   		zRouter._update_usagec                 C   s6   | j d u rdS | j D ]}t|trd|v r dS q
dS )NFr   T)r   r   r  )r  r  r   r   r   _has_default_fallbacks  s   


zRouter._has_default_fallbacksr  c                 C   s   |j rt|j dkr|j d jdkrdS |d| j}|dur=d}|D ]}t| d |kr5|| } nq#|dur<dS n|  rCdS t	d
|| dS )z
        Determines if a content policy error should be raised.

        Only raised if a fallback is available.

        Else, original response is returned.
        r   Zcontent_filterFr   NTzyContent Policy Error occurred. No available fallbacks. Returning original response. model={}, content_policy_fallbacks={})choicesr  Zfinish_reasonr   r   r   r  r  r   r  r
  )r  r   r  r  r   rh  rS  r   r   r   r    s0   
z)Router._should_raise_content_policy_errorc                 C   sz   g }z| j |d\}}t|trg W S W n	 ty   Y nw t| |d}g }|D ]}|d d |v r3q(|| q(||fS )Nrp  rk  ry  r{  )#_common_checks_available_deploymentr   r  r  r=   r   )r  r   r  r  r  unhealthy_deploymentsr   r  r   r   r   _get_healthy_deployments  s(   

zRouter._get_healthy_deploymentsc           	         s   g }z| j |d\}}t|trg |fW S W n	 ty    Y nw t| |dI dH }t|}g }|D ]}|d d |vrA|| q2||fS )z
        Returns Tuple of:
        - Tuple[List[Dict], List[Dict]]:
            1. healthy_deployments: list of healthy deployments
            2. all_deployments: list of all deployments
        rp  rk  Nry  r{  )r  r   r  r  r;   setr   )	r  r   r  r  r  r  Zunhealthy_deployments_setr   r  r   r   r   r    s,   	



z%Router._async_get_healthy_deploymentsc                 C   s$   t jD ]}t|tr|| qdS )a  
        Mimics 'async_routing_strategy_pre_call_checks'

        Ensures consistent update rpm implementation for 'usage-based-routing-v2'

        Returns:
        - None

        Raises:
        - Rate Limit Exception - If the deployment is over it's tpm/rpm limits
        N)r   r  r   r   r  )r  r  r  r   r   r   r  8  s
   


z'Router.routing_strategy_pre_call_checksr+  c                    s  t jD ]}t|trz|||I dH  W q t jyU } z2|dur@t|j|t	
 t d tj|j|t	
 fd  t| |j||d d | jd |d}~w ty } z#|dur}t|j|t	
 t d tj|j|t	
 fd  |d}~ww qdS )O  
        For usage-based-routing-v2, enables running rpm checks before the call is made, inside the semaphore.

        -> makes the calls concurrency-safe, when rpm limits are set for a deployment

        Returns:
        - None

        Raises:
        - Rate Limit Exception - If the deployment is over it's tpm/rpm limits
        Nrs  traceback_exceptionr
  r  r  ry  r{  r  )r   r  r   r   Zasync_pre_call_checkr  r  r  async_failure_handlerr  r  r  r  r  failure_handlerr  r>   r  r   r  )r  r  r  r+  r  r  r   r   r   rk  H  s^   




z-Router.async_routing_strategy_pre_call_checksr  c           
         s   |}t jD ]F}t|trLz|j|||||dI dH }W q tyK }	 z#|durEt|j|	t	
 t d tj|j|	t	
 fd  |	d}	~	ww q|S )r  r   r   r  r  r  Nr  r  )r   r  r   r   Zasync_filter_deploymentsr  r  r  r  r  r  r  r  r  r  r  )
r  r   r   r  r  r  r+  Zreturned_healthy_deploymentsr  r  r   r   r   !async_callback_filter_deployments  s@   


	
z(Router.async_callback_filter_deploymentsr   c                 C   s   |g}|  D ]D\}}t|tr|| nt|tr$|t| n|t| t|tr6|| qt|trD|t| q|t| qd|}t	|
 }| S )z
        Helper function to consistently generate the same id for a deployment

        - create a string from all the litellm params
        - hash
        - use hash as id
        r  )r  r   r   r   r  r!  dumpsjoinhashlibsha256encode	hexdigest)r  rF  r   partsr  r  Z
concat_strZhash_objectr   r   r   r~    s   





zRouter._generate_model_idr  _model_name_litellm_paramsr  c              
      sr  zt di |}tdi ||||d}tj D ]}|j|dur+|j| ||< q|jj}|dur<t	j
||id |jj}|jjdurN|jjd | }tj   fdd| D }	t	j
||	id | j|ddurtd	|j d
|jd   W dS | j|d}|jdd}
| j|
|jjd |W S  ty } z| jrtd| d W Y d}~dS |d}~ww )a^  
        Create a deployment object and add it to the model list

        If the deployment is not active for the current environment, it is ignored

        Returns:
        - Deployment: The deployment object
        - None: If the deployment is not active for the current environment (if 'supported_environments' is set in litellm_params)
        r}  N
model_costr  c                    r  r   r   r  Z_custom_pricing_fieldsr   r   r  	  r  z-Router._create_deployment.<locals>.<dictcomp>r  TzIgnoring deployment z% as it is not active for environment supported_environmentsr   r   re  zError creating deployment: 1, ignoring and continuing with other deployments.r   )rZ   rW   rg   model_fieldsr  r   r   ry  r{  r   register_modelr   r*  r  $deployment_is_active_for_environmentr   r   r  _add_deploymentto_json _add_model_to_list_and_index_mapr  r   rs  )r  r  r  r  r  r   r  fieldre  Z_shared_model_infor   r  r   r  r   _create_deployment  sj   

	


zRouter._create_deploymentc                 C   s$   |j drdS |j drdS dS )z
        Check if the deployment is an auto-router deployment (semantic router).

        Returns True if the litellm_params model starts with "auto_router/"
        but NOT "auto_router/complexity_router" (which uses complexity routing).
        auto_router/complexity_routerFzauto_router/Tr   r  r  r   r   r   r   _is_auto_router_deployment-  s
   z!Router._is_auto_router_deploymentc                 C   s   ddl m} |jj}|jj}|du r|du rtd|jj}|du r&td|jj}|du r2td||j||||| d}|j| j	v rLtd|j d	|| j	|j< dS )
z
        Initialize the auto-router deployment.

        This will initialize the auto-router and add it to the auto-routers dictionary.
        r   )r{   Nzzauto_router_config_path or auto_router_config is required for auto-router deployments. Please set it in the litellm_paramszfauto_router_default_model is required for auto-router deployments. Please set it in the litellm_paramszhauto_router_embedding_model is required for auto-router deployments. Please set it in the litellm_params)r  auto_router_config_pathauto_router_configdefault_modelembedding_modelr  zAuto-router deployment 3 already exists. Please use a different model name.)
/litellm.router_strategy.auto_router.auto_routerr{   r   r  r  r*  Zauto_router_default_modelZauto_router_embedding_modelr  r   )r  r  r{   r  r  r  r  Zautor_routerr   r   r   init_auto_router_deployment:  sB   z"Router.init_auto_router_deploymentc                 C   s   |j drdS dS )z
        Check if the deployment is a complexity-router deployment.

        Returns True if the litellm_params model starts with "auto_router/complexity_router"
        r  TFr  r  r   r   r    _is_complexity_router_deploymenti  s   z'Router._is_complexity_router_deploymentc                 C   s   ddl m} |jj}|jj}|du r$|r$|di }|dp#|d}|du r,td||j|| |d}|j| jv rDtd	|j d
|| j|j< dS )z
        Initialize the complexity-router deployment.

        This will initialize the complexity-router and add it to the complexity-routers dictionary.
        r   r}   NtiersZMEDIUMSIMPLEzcomplexity_router_default_model is required for complexity-router deployments, or configure tiers in complexity_router_config. Please set it in the litellm_params)r  r  r  complexity_router_configzComplexity-router deployment r  )	;litellm.router_strategy.complexity_router.complexity_routerr~   r   r  Zcomplexity_router_default_modelr   r*  r  r   )r  r  r~   r  r  r  Zcomplexity_routerr   r   r   !init_complexity_router_deployments  s.   	z(Router.init_complexity_router_deploymentc                 C   s   |j du sd|j vs|j d du rdS tdd}|du r td|tvr.tdt d| |j d D ]}|tvrFtd	t d| d
| q3||j d v rPdS dS )a  
        Function to check if a llm deployment is active for a given environment. Allows using the same config.yaml across multople environments

        Requires `LITELLM_ENVIRONMENT` to be set in .env. Valid values for environment:
            - development
            - staging
            - production

        Raises:
        - ValueError: If LITELLM_ENVIRONMENT is not set in .env or not one of the valid values
        - ValueError: If supported_environments is not set in model_info or not one of the valid values
        Nr  TZLITELLM_ENVIRONMENT)Zsecret_namezPSet 'supported_environments' for model but not 'LITELLM_ENVIRONMENT' set in .envz#LITELLM_ENVIRONMENT must be one of z. but set as: z&supported_environments must be one of z for deployment: F)ry  r   r*  rQ   )r  r  Zlitellm_environmentZ_envr   r   r   r    s,   


z+Router.deployment_is_active_for_environmentc                 C   s0  t |}g | _i | _i | _|   |   |D ]k}|d}|d}t|t	rB|
 D ]\}}t|trA|drAt|||< q-|di }d|vrV| ||}	|	|d< |dd d urzt|d trz|d D ]}
|
|d< | j||||d qiq| j||||d qtd|    d	d
 |D | _d S )Nr  r   os.environ/ry  r{  Zorganization)r  r  r  r  z
Initialized Model List c                 S   s   h | ]}|d  qS r  r   r  r   r   r   	<setcomp>  r  z(Router.set_model_list.<locals>.<setcomp>)r  r  r   r   r   "_invalidate_model_group_info_cache_invalidate_access_groups_cacher  r   r  r  r   r  ru   r~  r   r   r  r   r  get_model_namesr   )r  r   Zoriginal_model_listr   r  r  r  r  r  Z_idorgr   r   r   r     sP   



	zRouter.set_model_listc                 C   sV  dd l }|jj}d}d|v r|dd }|tjv rd}|r'|}d }d }d }	n#tj|jj|jdd d\}}}}	|tjvrJt	
|sJtd| | j|jj |jjd u rgt|dd d urgt|d|j_|jjd u r|t|d	d d ur|t|d	|j_d
|jv r| j|j|jdd |jjr| j|jj |jd}
|jd}|
d ur|d urd
|v r|
| jvrt | j|
< | j|
 ||jdd |jdg pg }|D ])}|di }dD ]}||v r|| dr|| dd}|j|d||< qq|d ur| j|||jjd | j|jdr| j|d | j |jdr)| j!|d |S )Nr   Fr  Tr*  r   r*  zUnsupported provider - r  r  r   r   team_idteam_public_model_nameZdataSources
parameters)r  rv  r  r  )r  r*  r   )r   r  )"osr   r   r  r   r  rs   r   Zprovider_listr%   existsr  r   r   r  r!  r  r  r   Zadd_patternr  ry  r{  r   r   ry   r  r  environ'_initialize_deployment_for_pass_throughr  r  r  r  )r  r  r  r  Zis_prompt_management_modelr  r  r*  r  r  Z_team_idZ_team_public_model_nameZdata_sourcesZdata_sourcer   Z	param_keyenv_namer   r   r   r    s   






	zRouter._add_deploymentc                 C   s   |j jdu rkddlm} |j jdurt|j j}ni }|dkrP|dp(|j j}|dp1|j j	}|dp:|j j
}|du sC|du rGtd	|j|||d
 n|dpX|j j}	|dpa|j j}
|j||	|
d 	 dS )a*  
        Optional: Initialize deployment for pass-through endpoints if `deployment.litellm_params.use_in_pass_through` is True

        Each provider uses diff .env vars for pass-through endpoints, this helper uses the deployment credentials to set the .env vars for pass-through endpoints
        Tr   )passthrough_endpoint_routerNZ	vertex_aivertex_projectvertex_locationvertex_credentialsz]vertex_project, and vertex_location must be set in litellm_params for pass-through endpoints.)Z
project_idlocationr  r  r  )r*  r  r  )r   use_in_pass_throughZ>litellm.proxy.pass_through_endpoints.llm_passthrough_endpointsr  r  r!   get_credential_valuesr   r  r  r  r*  Zadd_vertex_credentialsr  r  Zset_pass_through_credentials)r  r  r*  r   r  credential_valuesr  r  r  r  r  r   r   r   r  {  sN   




z.Router._initialize_deployment_for_pass_throughc                 C   s   |j j}|r| |rdS |jdd}| j|d |j j}|durF|j jdd}tj D ]}|j	
|}|dur=|||< q-tj||id | j||j jd | j|j |S )z
        Parameters:
        - deployment: Deployment - the deployment to be added to the Router

        Returns:
        - The added deployment
        - OR None (if deployment already exists)
        NTr   r  r  r  )ry  r{  r  r  r  r  rg   r  r  r   r   r   r  r  r   addr  )r  r  _deployment_model_id_deploymentr  Z_model_info_dictr  Zfield_valuer   r   r   add_deployment  s*   zRouter.add_deploymentre  removal_idxc                 C   s   | j  D ]\}}||kr|d | j |< q|| j v r| j |= t| j D ]5\}}||v r2|| g }|D ]}||krD||d  q6|| q6t|dkrV|| j|< q%| j|= q%dS )a"  
        Helper method to update deployment indices after a deployment has been removed from model_list.

        Parameters:
        - model_id: str - the id of the deployment that was removed
        - removal_idx: int - the index where the deployment was removed from model_list
        rx   r   N)r   r  r   r   r  r   r  )r  re  r  r  r  r  indicesZupdated_indicesr   r   r   (_update_deployment_indices_after_removal  s$   


z/Router._update_deployment_indices_after_removalc                 C   s   t | j}| j| |   |   |dur|| j|< n|di ddur1|| j|d d < |d}|rL|| jvrBg | j|< | j| | dS dS )a!  
        Helper method to add a model to the model_list and update both indices.

        Parameters:
        - model: dict - the model to add to the list
        - model_id: Optional[str] - the model ID to use for indexing. If None, will try to get from model["model_info"]["id"]
        Nry  r{  r  )r  r   r   r  r  r   r   r   )r  r   re  r  r  r   r   r   r    s   




z'Router._add_model_to_list_and_index_mapc              
   C   s   zL|j jpd}| j|d}|durD|j|jkrW dS d}|j j}| j}||v rD|| }|durD| j| |   |   | j	||d | j
|d |W S  tym } z| jrgtd| d W Y d}~dS |d}~ww )z
        Add or update deployment
        Parameters:
        - deployment: Deployment - the deployment to be added to the Router

        Returns:
        - The added/updated deployment
        r  r  Nre  r  r  zError upserting deployment: r  )ry  r{  r  r   r   r   r  r  r  r  r   r  r   r   r  )r  r  r  Z_deployment_on_routerr  r  Zdeployment_fast_mappingr  r   r   r   r  $  s>   	
zRouter.upsert_deploymentr{  c                 C   sl   d}|| j v r| j | }z|dur)| j|}|   |   | j||d |W S W dS  ty5   Y dS w )z
        Parameters:
        - id: str - the id of the deployment to be deleted

        Returns:
        - The deleted deployment
        - OR None (if deleted deployment not found)
        Nr  )r   r   r  r  r  r  r  )r  r{  Zdeployment_idxrS  r   r   r   delete_deploymentW  s    	

zRouter.delete_deploymentc                 C   sZ   || j v r+| j | }| j| }t|trtdi |S t|tr"|S tdt|dS )zl
        Returns -> Deployment or None

        Raise Exception -> if model found in invalid format
        zModel invalid format - {}Nr   )r   r   r   r  rW   r  r
  r   )r  re  r  r   r   r   r   r  s  s   




zRouter.get_deploymentc                 C   s8   | j |d}|du rdS tdi |jjddjddS )zE
        Returns -> dict of credentials for a given model id
        r  NTr   r   )r  rU   r   r  )r  re  r  r   r   r   get_deployment_credentials  s   z!Router.get_deployment_credentialsmodel_group_namec                 C   sb   || j v r/| j | }|r/| j|d  }t|trtdi |S t|tr&|S tdt|dS )z
        Returns -> Deployment or None

        Raise Exception -> if model found in invalid format

        Optimized with O(1) index lookup instead of O(n) linear scan.
        r   zModel Name invalid - {}Nr   )r   r   r   r  rW   r  r
  r   )r  r  r  r   r   r   r   "get_deployment_by_model_group_name  s   



z)Router.get_deployment_by_model_group_namec                 C   s,  | j |d}|du r| j|d}|du r6| j|pg }|r6|d }t|tr/tdi |}nt|tr6|}|du r<dS tdi |jj	ddj	dd}|jj
durqt|jj
}|sftd|jj
 d || |d	d |jjr}|jj|d
< |S d|jjv r|jjdd |d
< |S d|d
< |S )a  
        Get API credentials and provider info from a model name in model_list.
        Useful for passthrough endpoints (files, batches, etc.) that need credentials.

        This method tries to find a deployment by model_id first, and if not found,
        it tries to find by model_group_name (model_name).

        Args:
            model_id: Model ID or model name from model_list (e.g., "gpt-4o-litellm")

        Returns:
            Dictionary containing api_key, api_base, custom_llm_provider, etc.
            Returns None if model not found.

        Example:
            credentials = router.get_deployment_credentials_with_provider("gpt-4o-litellm")
            # Returns: {"api_key": "sk-...", "custom_llm_provider": "openai", ...}
        r  N)r  r   Tr   zCredential 'z' not found in credential_listr  r*  r  r   r   )r  r  r   router   r  rW   rU   r   r  r  r!   r  r   r   r   r  r*  r   r  )r  re  r  potential_wildcard_modelsZdeployment_dictcredentialsr  r   r   r   (get_deployment_credentials_with_provider  sZ   


z/Router.get_deployment_credentials_with_providerrW   r  c                 C      d S r/  r   r  r  r  r{  r   r   r   r       zRouter.get_router_model_infoc                 C   r  r/  r   r  r   r   r   r    r  c              	   C   s  |dur| j |d}|dur|}|du rtd|dpi dd}|du r2|dp-i dd}|}|d}t|trA|}n't|trRd|v rRtdi |}ntdt|j d	|dp`i d
d tj	|j
|d\}	}
}}|
dkr|du rtd|	 d nE|
dkr|	}d|v r| j|}|dur|D ].}z#|dpi d
|dpi d
kr|dpi d}W  nW q ty   Y qw |du r|	}|d|
sd|
|}n|}tj|d}|dpi }|dur|tt| |S )a  
        For a given model id, return the model info (max tokens, input cost, output cost, etc.).

        Augment litellm info with additional params set in `model_info`.

        For azure models, ignore the `model:`. Only set max tokens, cost values if base_model is set.

        Returns
        - ModelInfo - If found -> typed dict with max tokens, input cost, etc.

        Raises:
        - ValueError -> If model is not mapped yet
        Nr  zDeployment not foundry  
base_modelr   r   z.Deployment missing valid litellm_params. Got: z, deployment_id: r{  unknownr   r   Zazurez Could not identify azure model 'z'. Set azure 'base_model' for accurate max tokens, cost tracking, etc.- https://docs.litellm.ai/docs/proxy/cost_tracking#spend-tracking-for-azure-openai-modelsr   z{}/z{}/{}rp  r   )r  r*  r   r   rZ   r  r   r   r   rs   r   r   r  r   r	  r  r  r
  get_model_infor   r   rj   )r  r  r  r{  r  r  r   Zlitellm_params_datar   r  r*  r  Zpotential_modelsZpotential_modelZmodel_info_namery  Zuser_model_infor   r   r   r    sx   


c                 C   s"   || j v r| j | }| j| S dS )a%  
        For a given model id, return the model info

        Returns
        - dict: the model in list with 'model_name', 'litellm_params', Optional['model_info']
        - None: could not find deployment in list

        Optimized with O(1) index lookup instead of O(n) linear scan.
        N)r   r   )r  r{  r  r   r   r   r  b  s   


zRouter.get_model_infoc                 C   s,   | j |d}|du rdS |d }| j|dS )zT
        Return list of all models in the same model group as that model id
        )r{  Nr  r  )r  r  )r  r{  ry  r  r   r   r   get_model_groupr  s
   zRouter.get_model_groupc           	      C   s   ddl m} d}d}d}ztj|}W n	 ty   Y nw ztj|d}W n	 ty/   Y nw z&|durU|dd}|durUtj|d}|durU|pLi }|tt||}W n	 ty_   Y nw |durw|durwtt	|tt|
 |}|S |dur}|}|S )z
        For a given model id, return the model info

        1. Check if model_id is in model info
        2. If not, check if litellm model name is in model info
        3. If not, return None
        r   )_update_dictionaryNrp  r  )litellm.utilsr  r   r  r   r  r  r   r  rj   r  )	r  re  r  r  ry  Zcustom_model_infoZlitellm_model_name_model_infor  Zbase_model_infor   r   r   get_deployment_model_info~  sR   

z Router.get_deployment_model_infouser_facing_model_group_namec                 C   s:  d}d}d}d}| j |d}|du rdS |D ]f}d}	d|v r(|d |kr(d}	nd|v r6| j|dur6d}	|	s9qtdi |d }
|
j}|di }|di }d}|du r]|dd}|du rg|dd}|du rq|dd}d}|du r}|d	d}|du r|d	d}|du r|d	d}z|d
d}|dur| j||
jd}nd}W n ty   d}Y nw d\}}zt	j
|
j|
jd\}}}}W n t	jjy } ztdt| W Y d}~nd}~ww |du rt	j||d}|du rg }|di }|dd}t|ddddd|||dd
}|du r&tdi ||gd|}n7||jvr2|j| |dddurU|d durU|jdu sP|d |jkrU|d |_|dddurx|d durx|jdu ss|d |jkrx|d |_|dddur|jdu s|d |jkr|d |_|dddur|jdu s|d |jkr|d |_|dddur|d du rd|_|dddur|d du rd|_|dddur|d du rd|_|dddur|d du rd|_|dddur|d du rd|_|dddur"|d du r"d|_ |dddur7|d dur7|d |_!|dddurJ|du rJ|d}|d	ddur]|du r]|d	}|durm|du rid}||7 }|dur}|du ryd}||7 }q|dur|dur||_"|dur||_#|dur||_|S ) z
        For a given model group name, return the combined model info

        Returns:
        - ModelGroupInfo if able to construct a model group
        - None if error constructing model group info
        Nr  Fr  Tr   ry  r  r  r{  )re  r  )r  r  r  z.litellm.router.py::get_model_group_info() - {}moder   r   )
rv  Z
max_tokensmax_input_tokensmax_output_tokensinput_cost_per_tokenoutput_cost_per_tokenZlitellm_providerr  supported_openai_paramsZsupports_system_messages)rF  	providersr  r  r  r  "supports_parallel_function_callingsupports_visionsupports_function_callingsupports_web_searchsupports_url_contextsupports_reasoningr  r   )$r  r   r	  rZ   #configurable_clientside_auth_paramsr   r  r   r  r   rs   r*  
exceptionsBadRequestErrorr   r  r
  r   get_supported_openai_paramsModelMapInfor\   r  r   r  r  r  r  r   r!  r"  r#  r$  r%  r  r  r  )r  rF  r  model_group_infoZ	total_tpmZ	total_rpmr&  r   r   Zis_matchr   Zmodel_litellm_paramsZmodel_info_dictZ_deployment_tpmZ_deployment_rpmre  ry  r  r  r  r  r  Zdb_model_infor  r   r   r   _set_model_group_info  sJ  

















zRouter._set_model_group_infoc                 C   sh   || j v r-| j | }t|tr|}nt|tr$|d du rdS |d }ndS | j||dS | j||dS )z
        For a given model group name, return the combined model info

        Returns:
        - ModelGroupInfo if able to construct a model group
        - None if error constructing model group info or hidden model group
        hiddenTNr   )rF  r  )r   r   r   r  r,  )r  rF  rS  Z_router_model_groupr   r   r   get_model_group_info  s    
	



zRouter.get_model_group_infoc                    sZ  t  }|d}g }g }| j|d}|du rdS |D ]5}|di d}|d d}	|du s5|	du r6q|tjjj||	|d	 |tj	jj||	|d	 q|| }
| j
j|
d
I dH }|du redS |dt| }|t|d }d}|dur|D ]}t|tr|du rd}||7 }q}d}|dur|D ]}t|tr|du rd}||7 }q||fS )z
        Returns current tpm/rpm usage for model group

        Parameters:
        - model_group: str - the received model name from the user (can be a wildcard route).

        Returns:
        - usage: Tuple[tpm, rpm]
        r  r  Nrb  ry  r{  r   r   )r{  r   r  )r  r   )rv   r  r  r   r   r_   r  r$  r
  r  r   Zasync_batch_get_cacher  r   r   )r  rF  r  r  Ztpm_keysZrpm_keysr   r   r{  r  Zcombined_tpm_rpm_keysZcombined_tpm_rpm_valuesZtpm_usage_listZrpm_usage_listZ	tpm_usager  Z	rpm_usager   r   r   get_model_group_usage  sp   

zRouter.get_model_group_usage)maxsizec                 C   s
   |  |S )z
        Cached version of get_model_group_info, uses @lru_cache wrapper

        This is a speed optimization, since set_response_headers makes a call to get_model_group_info on every request
        )r.  )r  rF  r   r   r   _cached_get_model_group_info  s   
	z#Router._cached_get_model_group_infoc                    s   |  |}|d ur|jd ur|j}nd }|d ur"|jd ur"|j}nd }|d u r.|d u r.i S | |I d H \}}i }|d urJ||pBd |d< ||d< |d urZ||pRd |d< ||d< |S )Nr   x-ratelimit-remaining-tokenszx-ratelimit-limit-tokensx-ratelimit-remaining-requestszx-ratelimit-limit-requests)r1  r  r  r/  )r  rF  r+  Z	tpm_limitZ	rpm_limitZcurrent_tpmZcurrent_rpmZreturned_dictr   r   r   get_remaining_model_group_usage  s.   
z&Router.get_remaining_model_group_usagec                    s   t |trIt|drIt |jtrI|jdi  ||jd d< |jd }d|vrId|vrI|durI| |I dH }| D ]\}}|durH|||< q<|S )a  
        Add the most accurate rate limit headers for a given model response.

        ## TODO: add model group rate limit headers
        # - if healthy_deployments > 1, return model group rate limit headers
        # - else return the model's rate limit headers
        r,  r  zx-litellm-model-groupr2  r3  N)r   r   r"  r,  r  r   r4  r  )r  r  rF  r  Zremaining_usageheaderr$  r   r   r   r  1  s0   


zRouter.set_response_headersc                 C   sR   | j   t|D ]\}}|d}|r&|| j vrg | j |< | j | | q	dS )z
        Build model_name -> deployment indices mapping for O(1) lookups.

        This index allows us to find all deployments for a given model_name in O(1) time
        instead of O(n) linear scan through the entire model_list.
        r  N)r   clearr  r   r   )r  r   r  r   r  r   r   r   _build_model_name_indexU  s   



zRouter._build_model_name_indexc                 C   s   g | _ |   |   t|D ]:\}}|di }|d}|du rB|dd}|di }| ||}d|vr<i |d< ||d d< | j||d qdS )z
        Build model index from model list to enable O(1) lookups immediately.
        This is called during initialization to avoid the race condition where
        requests arrive before model_id_to_deployment_index_map is populated.
        ry  r{  Nr  r  r   r  )r   r  r  r  r   r~  r  )r  r   r  r   ry  re  r  r   r   r   r   '_build_model_id_to_deployment_index_mape  s   
z.Router._build_model_id_to_deployment_index_mapexclude_team_modelsc                 C   s   g }|dur9|| j v r7| j | }|D ]$}| j| }d|v r6d|d v r6|r-|d dr-q||d d  q|S | j D ]%}| j| }| j| }d|v rcd|d v rc|r^|d dr^q>|| q>|S )z
        if 'model_name' is none, returns all.

        Returns list of model id's.

        Optimized with O(1) or O(k) index lookup when model_name provided,
        instead of O(n) linear scan.
        Nry  r{  r  )r   r   r   r   r   r  )r  r  r9  idsr  r  r   re  r   r   r   get_model_ids  s*   





zRouter.get_model_idscandidate_idc                 C   s
   || j v S )a  
        O(1) membership check for a deployment ID without allocating large lists.

        Note: Call sites may pass a variable named `model` when it actually
        contains a deployment ID. This helper expects the deployment ID string.

        Uses the existing `model_id_to_deployment_index_map` which is kept
        in sync by `_build_model_id_to_deployment_index_map` and model-list
        mutation helpers.
        )r   )r  r<  r   r   r   r    s   
zRouter.has_model_idc                 C   s   |sdS || j v s| |r|S | jdd}|sdS |D ]2}|di }|d}||kp@|o6|d| p@|o@|d| }|rN|d}|rN|  S qdS )a  
        Resolve model_name from model_id.

        This method attempts to find the correct model_name to use with the router
        so that litellm_params can be automatically injected from the model config.

        Strategy:
        1. First, check if model_id directly matches a model_name or deployment ID
        2. If not, search through router's model_list to find a match by litellm_params.model
        3. Return the model_name if found, None otherwise

        Args:
            model_id: The model_id extracted from decoded video_id
                     (could be model_name or litellm_params.model value)

        Returns:
            model_name if found, None otherwise. If None, the request will fall through
            to normal flow using environment variables.
        Nr  r   r   r  :r  )r   r  r  r   endswith)r  re  Z
all_modelsr  r   Zactual_modelmatchesr  r   r   r    resolve_model_name_from_model_id  s*   

z'Router.resolve_model_name_from_model_idteam_model_namer  c                 C   sH   | j ||d}|sdS |D ]}|di d|kr!|d  S qdS )z
        Map a team model name to a team-specific model name.

        Returns:
        - deployment id: str - the deployment id of the team-specific model
        - None: if no team-specific model name is found
        r  r  Nry  r  r  )r  r   )r  rA  r  r  r   r   r   r   map_team_model  s   zRouter.map_team_modelc                 C   sL   |dur|d  d|kr||d  dkrdS |dur$|d |kr$dS dS )zU
        Get the team-specific model name if team_id matches the deployment.
        Nry  r  r  Tr  Fr  )r  r  r   r  r   r   r   should_include_deployment  s   z Router.should_include_deploymentmodel_aliasc           	      C   s   g }|| j v r7| j | }|D ]&}| j| }| j|||dr4|dur/| }||d< || q|| q|S |durdt| jD ]#\}}| j|||drc|dur^| }||d< || q@|| q@|S )z
        Return all deployments of a model name

        Used for accurate 'get_model_list'.

        if team_id specified, only return team-specific models

        Optimized with O(1) index lookup instead of O(n) linear scan.
        )r  r   r  Nr  )r   r   rD  r  r   r  )	r  r  rE  r  returned_modelsr  r  r   Zalias_modelr   r   r   _get_all_deployments	  s6   




zRouter._get_all_deploymentsc                 C   sb   |   pg }g }|D ]$}|d}| |r%| j||d}|r$|| q
||dd q
|S )u  
        Returns all possible model names for the router, including models defined via model_group_alias.

        If a team_id is provided, only deployments configured with that team_id (i.e. team‐specific models)
        will yield their team public name.
        ry  )r  r  r  r  )r  r   _is_team_specific_model_get_team_specific_modelr   )r  r  Zdeploymentsr   r  ry  rA  r   r   r   r  >  s   


zRouter.get_model_namesc                 C   s6   | dpi }|du rdS || dkr| dS dS )a  
        Get the team-specific model name if team_id matches the deployment.

        Args:
            deployment: DeploymentTypedDict - The model deployment
            team_id: Optional[str] - If passed, will return router models set with a `team_id` matching the passed `team_id`.

        Returns:
            str: The `team_public_model_name` if team_id matches
            None: If team_id doesn't match or no team info exists
        ry  Nr  r  r  )r  r  r  ry  r   r   r   rI  U  s   
zRouter._get_team_specific_modelry  c                 C   s   t |o|dS )z
        Check if model info contains team-specific configuration.

        Args:
            model_info: Model information dictionary

        Returns:
            bool: True if model has team-specific configuration
        r  )r  r   )r  ry  r   r   r   rH  j  s   
zRouter._is_team_specific_modelc                 C   s   g }|dur|| j vr|S || j | fg}nt| j  }|D ]/\}}t|tr+|}nt|trCtdi |}|d du r>q|d }nq|| j||d q|S )z
        Helper function to get model list from model alias.

        Used by `.get_model_list` to get model list from model alias.
        Nr-  Tr   )r  rE  r   )	r   r   r  r   r   r  ra   rP  rG  )r  r  rF  Zalias_itemsrE  Zmodel_valueZ_router_model_nameZ_model_valuer   r   r   get_model_list_from_model_aliasv  s*   



z&Router.get_model_list_from_model_aliasc                 C   s   g }|dur| | j||d | | j|d t|dkr\| j|p&g }|dur?|| jv r?| j| |p9g }| | |dur\|dur\|D ]}tdi |}||d< || qI|du re|| j	7 }|S )z
        Includes router model_group_alias'es as well

        if team_id specified, returns matching team-specific models
        NrB  r  r   r  r   )
rP  rG  rJ  r  r   r	  r   rX   r   r   )r  r  r  rF  r
  Z#potential_team_only_wildcard_modelsr  Zdeployment_typed_dictr   r   r   r    s,   



zRouter.get_model_listc                 C   s   | j   dS )zInvalidate the cached model group info.

        Call this whenever self.model_list is modified to ensure the cache is rebuilt.
        N)r1  cache_clearr1  r   r   r   r    s   z)Router._invalidate_model_group_info_cachec                 C   s
   d| _ dS )zInvalidate the cached access groups.

        Call this whenever self.model_list is modified to ensure the cache is rebuilt.
        N)r   r1  r   r   r   r    s   
z&Router._invalidate_access_groups_cachemodel_access_groupc                 C   s   |du o|du o|du }|r| j dur| j S ddlm} |t}| j||d}|r_|D ]3}|d}	|	r^|	dg p;g D ]!}
|durR|
|krQ|d }||
 | q<|d }||
 | q<q+|rit|| _ | j S |S )a  
        If model_name is provided, only return access groups for that model.

        Parameters:
        - model_name: Optional[str] - the received model name from the user (can be a wildcard route). If set, will only return access groups for that model.
        - model_access_group: Optional[str] - the received model access group from the user. If set, will only return models for that access group.
        - team_id: Optional[str] - the team id, to resolve team-specific models
        Nr   r   rB  ry  r   r  )r   collectionsr   r   r  r   r   r  )r  r  rL  r  Z
_use_cacher   r   r   r  r  groupr   r   r   get_model_access_groups  s.   

zRouter.get_model_access_groupsc                 C   sN   | j |d}t|dkrdS ||g }|D ]}| jj|ddur$ dS qdS )zG
        Return True if model access group is a wildcard route
        )rL  r   Fr  NT)rO  r  r   r   r	  )r  rL  r   r  r   r   r   r   )_is_model_access_group_for_wildcard_route   s   z0Router._is_model_access_group_for_wildcard_routec                 C   sX   t | }i }g d}|D ]}||v r|| ||< |dkr)| jdkr)| jj ||< q|S )a  
        Get router settings method, returns a dictionary of the settings and their values.
        For example get the set values for routing_strategy_args, routing_strategy, allowed_fails, cooldown_time, num_retries, timeout, max_retries, retry_after
        )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )varsr   r-  r)  r!  )r  Z	_all_varsZ_settings_to_returnZvars_to_includevarr   r   r   get_settings   s   
zRouter.get_settingsc                 K   s   g d}g d}|   }|D ]A}||v rG||v r%t|| }t| || q|dkr>|d || kr>| j|| |di d t| |||  qtd| qtd|     dS )	z-
        Update the router settings.
        )r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   zSetting {} is not allowedzUpdated Router settings: N)rS  r   r  r   r   r   r  r
  )r  r  Z_allowed_settingsZ_int_settingsZ_existing_router_settingsrR  Z_casted_valuer   r   r   update_settings:   s&   zRouter.update_settingsc                 C   s  |d d }t |}|dkr2d|}| jj|d|d}|du r0tj| |d | jj|d|d}|S |d	kr]|d
du rM| d}| jj|d|d}|S | d}| jj|d|d}|S |d
du rs| d}| jj||d}|S | d}| jj||d}|S )a  
        Returns the appropriate client based on the given deployment, kwargs, and client_type.

        Parameters:
            deployment (dict): The deployment dictionary containing the clients.
            kwargs (dict): The keyword arguments passed to the function.
            client_type (str): The type of client to return.

        Returns:
            The appropriate client based on the given client_type and kwargs.
        ry  r{  re  z{}_max_parallel_requests_clientTrv  r  r  N)r  r   r  r   Z_stream_async_clientZ_async_clientZ_stream_client)rv  r  Z_client)r   r
  r   r  r4   Z set_max_parallel_requests_clientr   )r  r  r  rg  re  r  	cache_keyr  r   r   r   r  m   sL   




zRouter._get_clientc           !         s  t d|  t|}t  ztj|d}W n ty5 } zt dt	| |W  Y d}~S d}~ww d}d}	d}
t
|}t }|d}| d| }| jj|d	|d
pYi }t|D ]J\}}|di }|di }zO|dd}|du r|dd}| j||d}|p|dd}t|tr|dddurt|d tr||d kr | d	}|	d||d |7 }	W q^W n ty } zt dt	| W Y d}~nd}~ww |dd}| jj|d	|d
pd}t|tr-| jdkr-||d||< t||| }t|tr-|dddur-t|d tr-|d |kr- | d	}
q^|durV|ddurV|d}|durVttd#i ||dsV | q^|durtjdu rtj|td#i |d\}}}}tj||d}|du r~q^tjj|d}dg}|  D ]\}} ||vr||v rt d|   | qq^t! t!|kr	 |
d	u rt"|d|d	u rtj#d|	|dd t! dkr fd!d"t|D }t!|dkrtj$|}|S )$a  
        Filter out model in model group, if:

        - model context window < message length. For azure openai models, requires 'base_model' is set. - https://docs.litellm.ai/docs/proxy/cost_tracking#spend-tracking-for-azure-openai-models
        - filter models above rpm limits
        - if region given, filter out models not in that region / unknown region
        - [TODO] function call and model doesn't support function calling
        z2Starting Pre-call checks for deployments in model=)r  zllitellm.router.py::_pre_call_checks: failed to count tokens. Returning initial list of deployments. Got - {}NFr  r  z:rpm:TrU  r   ry  r  r  r   r  z%Model={}, Max Input Tokens={}, Got={}zAn error occurs - {}r{  r   r   r  allowed_model_region)r   rW  r  r  )Zpassed_paramsZresponse_formatz1INVALID MODEL INDEX @ REQUEST KWARG FILTERING, k=rp  z~litellm._pre_call_checks: Context Window exceeded for given call. No models have context window large enough for this call.
{}r  c                    s   g | ]
\}}| vr|qS r   r   )r%  idZinvalid_model_indicesr   r   r'  Y!  s
    z+Router._pre_call_checks.<locals>.<listcomp>r   )%r   r  r   r  r   Ztoken_counterr  r  r
  r   r   rv   r  r   r  r  r   r  r   r  r   r  rs  r   maxrw   rZ   Zdrop_paramsrs   r)  r  Zget_non_default_paramsr  r  rc   rl  Z_get_order_filtered_deployments)!r  r   r   r  r  Z_returned_deploymentsZinput_tokensr  Z_context_window_errorZ_potential_error_strZ_rate_limit_errorr  r  r  r  Zmodel_group_cacher  r  r  r  r  ry  re  Zcurrent_request_cache_localZcurrent_requestrW  r*  r  r  r  Zspecial_paramsr  r  r   rZ  r   _pre_call_checks   s
  


 









zRouter._pre_call_checksc                 C   s6   || j vrdS | j | }t|tr|}|S |d }|S )z
        Get the model from the alias.

        Returns:
        - str, the litellm model name
        - None, if model is not in model group alias
        Nr   )r   r   r   )r  r   Z_itemr   r   r   rr  g!  s   


zRouter._get_model_from_aliasc                    s    fdd| j D S )z6
        Get the deployment by litellm model.
        c                    s    g | ]}|d  d  kr|qS )r   r   r   r  rp  r   r   r'  ~!  s     z;Router._get_deployment_by_litellm_model.<locals>.<listcomp>)r   )r  r   r   rp  r    _get_deployment_by_litellm_modelz!  s   z'Router._get_deployment_by_litellm_modelr  c                 C   sD  d}|dur| dpi }| dpi }| dp| d}|du r*|| j|dfS | |rM| j|d}	|	durE|	jj}
|
|	jddfS td	| d
| j|d}|durY|}|| j	vr| j
j|d}|rk||fS |dur|| jv r| j| j|d}|r||fS | jdur| j }| jd  |d< ||d d< ||fS | j|d}t|dkr| j|d}ttjrtd|  t|dkr|  r|  }|rtd| d| d |}| j|d}t|dkr| j|ddu rd| d|}n	d| d|}tj||ddtjr|tjv rtj| }||fS )aF  
        Common checks for 'get_available_deployment' across sync + async call.

        If 'healthy_deployments' returned is None, this means the user chose a specific deployment

        Returns
        - str, the litellm model name
        - List, if multiple models chosen
        - Dict, if specific model chosen
        Nr   r  Zuser_api_key_team_idTrp  r  r   zBLiteLLM Router: Trying to call specific deployment, but Model ID :z does not exist in Model ID mapr   r   r  r   zinitial list of deployments: zModel 'z7' not found. Attempting to use default fallback model 'z'.zYou passed in model=z+. There is no 'model_name' with this stringz1. There are no healthy deployments for this modelr  r  )r   r]  r  r  r   r   r  r*  rr  r   r   Zget_deployments_by_patternr   r   r  rG  r  r   rr  r   r   r  r  r  r	  r  r
  r   r(  Zmodel_alias_map)r  r   r  r  r  r  Zrequest_team_idr   r  r  Zdeployment_modelZ_model_from_aliasZpattern_deploymentsZupdated_deploymentr   Zfallback_modelr  r   r   r   r  !  s   





z*Router._common_checks_available_deploymentc           
         sT  | j |||||d\}}t||d}ttjr!td|  t||d}ttjr5td|  t|t	r<|S t
| |dI dH }ttjrStd|  | j||d}| j|||durittt |nd||d	I dH }| jr|dur| j|ttt |||d
}t| |||| |dI dH }t|dkrt| ||dI dH }	|	|S )z
        Get the healthy deployments for a model.

        Returns:
        - List[Dict], if multiple models chosen
        *OR*
        - Dict, if specific model chosen
        r   r  r  r  r  )r   r  z'healthy_deployments after team filter: z-healthy_deployments after web search filter: rk  Ncooldown deployments: r   cooldown_deploymentsr  r   r   r  r  )r  r   r  r   ro  r   r  r   r  )r  r7   r   rr  r   r   r  r8   r   r  r;   _filter_cooldown_deploymentsr  r   r   rL   r   r\  r	   r.   r  r  rC   )
r  r   r  r  r  r  r  r   ra  rs  r   r   r   r6  !  s   




z$Router.async_get_healthy_deploymentsc              
      s  | j dkr$| j dkr$| j dkr$| j dkr$| j dkr$| j|||||dS zt|}| j|||||dI dH }|dur@|j}|j}| j||||||d	I dH }t|trV|W S |	d
rft
|dkrf|d W S t }	| j dkr| jdur| jj||||dI dH }
nV| j dkr| jdur| jj||||dI dH }
n>| j dkr| jdur| jj|||||dI dH }
n%| j dkrt| ||dW S | j dkr| jdur| jj||dI dH }
nd}
|
du rt| ||dI dH }|td| d| |
 d|  t }||	 }t| jjtj|d||	|d |
W S  tyK } z+t }|durE|	dd}|durEtj |j!||fd"  t|#|| |d}~ww )z
        Async implementation of 'get_available_deployments'.

        Allows all cache calls to be made async => 10x perf impact (8rps -> 100 rps).
        r   r   r   r   r   r^  r   r  r  r  r  Nr   r  r  r  r  r  Z"_encrypted_content_affinity_pinnedrx   r   rF  r   r  r  rF  r   r  r  r  r  rF  r   rc  $get_available_deployment for model: , Selected deployment:  for model: 2<routing_strategy>.async_get_available_deploymentsr  r  r4  r  r	  r
  r  r  )$r   r  r   async_pre_routing_hookr   r  r6  r   r  r   r  r  r,  async_get_available_deploymentsr.  r-  r-   r   rC   r   r	  r  r  r  r   r  rf   r  r  r  r  r  r  r  r  r  )r  r   r  r  r  r  r  pre_routing_hook_responser   r	  r  rs  r
  r  r  r  r+  r   r   r   rc  ]"  s   










	


	


	





z%Router.async_get_available_deploymentc              
      sh  zt |}| j|||||dI dH }|dur|j}|j}| j||||||dI dH }t|trQ|di }	|	dr>|W S tj	d|di d d	|d
d| j
|d}
t|
dkritj	d| d|d
dt }| jdkr| jdur| jj||
||dI dH }n>| jdkr| jdur| jj||
|||dI dH }n%| jdkrt| |
|dW S | jdkr| jdur| jj||
dI dH }nd}|du rt| ||dI dH }|td| d| |  t }|| }t| jjtj|d|||d |W S  ty3 } z+t  }|dur-|dd}|dur-t!j"|j#||fd$  t|%|| |d}~ww )z
        Async version of get_available_deployment_for_pass_through

        Only returns deployments configured with use_in_pass_through=True
        re  Nrf  r   r  Deployment ry  r{  C does not support pass-through endpoint (use_in_pass_through=False)r  r  r   r   Model z has no deployments configured with use_in_pass_through=True. Please add use_in_pass_through: true to the deployment configurationr   rg  r   rh  r   r  r   ri  rc  z7async_get_available_deployment_for_pass_through model: , selected deployment: rm  rn  r  r  )&r   ro  r   r  r6  r   r  r   r   r(   _filter_pass_through_deploymentsr  r  perf_counterr   r,  rp  r-  r-   r   rC   r   r	  r  r  r  r   r  rf   r  r  r  r  r  r  r  r  r  )r  r   r  r  r  r  r  rq  r   r   pass_through_deploymentsr	  r  rs  r
  r  r  r  r+  r   r   r   /async_get_available_deployment_for_pass_through #  s   







	


	





z6Router.async_get_available_deployment_for_pass_throughc                    sZ   || j v r| j | j|||||dI dH S || jv r+| j| j|||||dI dH S dS )z
        This hook is called before the routing decision is made.

        Used for the litellm auto-router to modify the request before the routing decision is made.
        re  N)r   ro  r   )r  r   r  r  r  r  r   r   r   ro  #  s$   



zRouter.async_pre_routing_hookc                 C   s  | j ||||d\}}t|tr|S t|}t| |d}| j||d}| jr3|dur3| j||||d}t|dkrV| j	|d}	| j
j|	|d}
t| |d}t||
| j|d	| jd
kri| jduri| jj||d}nL| jdkrut| ||dS | jdkr| jdur| jj|||d}n,| jdkr| jdur| jj||||d}n| jdkr| jdur| jj||||d}nd}|du rtd| d | j	|d}	| j
j|	|d}
t| |d}t||
| j|d	td| d| | d|  |S )zB
        Returns the deployment based on routing strategy
        r   r  r  r  rk  r`  Nrb  r   r  	model_idsr  r   r   r   Zcooldown_listr   ri  r   r  r   rF  r   r  r   rg  r   rj  z, No deployment availablerk  rl  )r  r   r  r   r=   rd  r   r\  r  r;  r   get_min_cooldownrb   r   r   get_available_deploymentsr-   r-  r   r,  r   r	  r  )r  r   r  r  r  r  r   r  ra  r}  _cooldown_time_cooldown_listr  r   r   r   r  #  s   









zRouter.get_available_deploymentc                 C   sL  | j ||||d\}}t|tr0|di }|dr|S tjd|di d d|dd	| j|d
}t|dkrHtjd| d|dd	t|}	t	| |	d}
| j
||
d}| jri|duri| j||||d}t|dkr| j|d}| jj||	d}t	| |	d}t||| j|d| jdkr| jdur| jj||d}nL| jdkrt| ||dS | jdkr| jdur| jj|||d}n,| jdkr| jdur| jj||||d}n| jdkr| jdur| jj||||d}nd}|du rtd| d | j|d}| jj||	d}t	| |	d}t||| j|dtd| d | |  |S )!a  
        Returns deployments available for pass-through endpoints (based on load balancing strategy)

        Similar to get_available_deployment, but only returns deployments with use_in_pass_through=True

        Args:
            model: Model name
            messages: Optional list of messages
            input: Optional input data
            specific_deployment: Whether to find a specific deployment
            request_kwargs: Optional request parameters

        Returns:
            Dict: Selected deployment configuration

        Raises:
            BadRequestError: If no deployment is configured with use_in_pass_through=True
            RouterRateLimitError: If no pass-through deployments are available
        r{  r   r  rr  ry  r{  rs  r  r  rt  r   ru  z has no deployment configured with use_in_pass_through=True. Please add use_in_pass_through: true in the deployment configurationrk  r`  Nrb  r  r|  r~  r   ri  r   r  r   r  r   rg  r   z1get_available_deployment_for_pass_through model: z, no available deploymentsrv  )r  r   r  r   r   r(  rw  r  r   r=   rd  r   r\  r;  r   r  rb   r   r   r  r-   r-  r   r,  r   r	  r  )r  r   r  r  r  r  r   r   ry  r  ra  r}  r  r  r  r   r   r   )get_available_deployment_for_pass_through8$  s   












z0Router.get_available_deployment_for_pass_throughra  c                    s6   t tjrt d|  t|  fdd|D S )a  
        Filters out the deployments currently cooling down from the list of healthy deployments

        Args:
            healthy_deployments: List of healthy deployments
            cooldown_deployments: List of model_ids cooling down. cooldown_deployments is a list of model_id's cooling down, cooldown_deployments = ["16700539-b3cd-42f4-b426-6a12a1bb706a", "16700539-b3cd-42f4-b426-7899"]

        Returns:
            List of healthy deployments
        r_  c                    s    g | ]}|d  d  vr|qS )ry  r{  r   r%  r  Zcooldown_setr   r   r'  $  s
    z7Router._filter_cooldown_deployments.<locals>.<listcomp>)r   rr  r   r   r  r  )r  r   ra  r   r  r   rd  $  s   
z#Router._filter_cooldown_deploymentsc                 C   s>   t dt| d dd |D }t dt| d |S )a  
        Filter out deployments configured with use_in_pass_through=True

        Args:
            healthy_deployments: List of healthy deployments

        Returns:
            List[Dict]: Only includes a list of deployments that support pass-through
        z%Filter pass-through deployments from z healthy deploymentsc                 S   s$   g | ]}| d i  ddr|qS )r   r  Fr  r  r   r   r   r'  %  s    z;Router._filter_pass_through_deployments.<locals>.<listcomp>zFound z& deployments with pass-through enabled)r   r  r  )r  r   ry  r   r   r   rw  $  s   z'Router._filter_pass_through_deploymentsc              
   C   s~   z!| di  dd}|du r|dur| || W dS W dS W dS  ty> } ztdt|  W Y d}~dS d}~ww )z7
        Tracks successful requests rpm usage.
        ry  r{  Nz$Error in _track_deployment_metrics: )r   r  r  r   r  r   )r  r  r  r  re  r  r   r   r   rh  %  s   
"z Router._track_deployment_metricsc                 C   s   t ||| j| jdS )Nru  )r{  r   r   )r  rs  rF  r   r   r   rB   %  s   z(Router.get_num_retries_from_retry_policyc                 C   s   | j }|du r	dS t|tjr|jdur|jS t|tjr%|jdur%|jS t|tjr3|jdur3|jS t|tj	rA|j
durA|j
S t|tjrO|jdurQ|jS dS dS )a  
        BadRequestErrorRetries: Optional[int] = None
        AuthenticationErrorRetries: Optional[int] = None
        TimeoutErrorRetries: Optional[int] = None
        RateLimitErrorRetries: Optional[int] = None
        ContentPolicyViolationErrorRetries: Optional[int] = None
        N)r   r   r   r  ZAuthenticationErrorAllowedFailsrl  ZTimeoutErrorAllowedFailsr  ZRateLimitErrorAllowedFailsr  Z'ContentPolicyViolationErrorAllowedFailsr(  ZBadRequestErrorAllowedFails)r  rs  r   r   r   r   get_allowed_fails_from_policy)%  s2   	









z$Router.get_allowed_fails_from_policyc                 C   sb   ddl m} | jd u rd S | j}||jdg|jd}|| _tj| tj	|j
 td d S )Nr   )SlackAlertingZslack)alerting_thresholdZalertingZdefault_webhook_urlz2[94m
Initialized Alerting for litellm.Router[0m
)Z1litellm.integrations.SlackAlerting.slack_alertingr  r   r  Zwebhook_urlZslack_alerting_loggerr   r   r+  r  Z!response_taking_too_long_callbackr   r	  )r  r  Zrouter_alerting_configZ_slack_alerting_loggerr   r   r   r  Q%  s"   
zRouter._initialize_alertingCustomRoutingStrategyc                 C   s    t | d|j t | d|j dS )a  
        Sets get_available_deployment and async_get_available_deployment on an instanced of litellm.Router

        Use this to set your custom routing strategy

        Args:
            CustomRoutingStrategy: litellm.router.CustomRoutingStrategyBase
        r  rc  N)r  r  rc  )r  r  r   r   r   set_custom_routing_strategyi%  s   z"Router.set_custom_routing_strategyc                 C   s   d t _| j  d S r/  )r   r   flush_cacher1  r   r   r   r  %  s   zRouter.flush_cachec                 C   s*   g t _g t _g t _g t _d | _|   d S r/  )r   r  r   r  r  r   r  r1  r   r   r   reset%  s   zRouter.reset)F)r   r/  )FFF)T)rR  rb  )NNNN)NNNNN)NF)r   N)NNN)NNFN)NNF(   r   r   r   r  r   __annotations__r   r   r  r   r   tenacityr   r'   r   r*   r   r   r   r   r   r   r`   rX   r	   r   rT   re   rY   r  r  floatr   ra   r^   rS   r]   rh   rR   r  staticmethodr  r  r  r   r   r   r#  rd   r   r  rJ  rR  rl  rp  rt  r  r  r  r  r  r  r   r  r  rp   rn   r  r  r  r  r   rL   r  rk   rm   r#  rY  r  rb  r  r  r  r  rw  rW   r  r  r  ri  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  rM   r  r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r  r  r  r   ro   r#  r!  r'  r&  rN   r)  r(  r8  r  ri   r;  r9  r@  rJ  rI  rK  rQ  r0  r\  r]  r^  r_  r   r[  rO  r"   wrapr  rp  rq  rz  ry  r|  r  rm  r  r~  r   r  r  r  r  r}  rz   r  r  r  r  r  r  r  rk  r  r~  r  rZ   r  r  r  r  r  r   r   r  r  r   r  r  r  r  r  r  r  r  r*  r  r  r  rj   r  r\   r,  r.  r/  r   r   r1  r4  r  r7  r8  r;  r  r@  rC  rD  rG  r  rI  rH  rJ  r  r  r  rO  rP  rS  rT  r  r\  rr  r]  r  r6  rc  rz  r|   ro  r  r  rd  rw  rh  rB   r  r  rV   r  r  r  r   r   r   r   r      sV	  
 


 !$%&'*+,-01234
7:=@CDLMNOPSTU
   F
	"



OHH(-$
j



e
 

5
 
 


 0



 
F

"&
\

/
S
B


A

d.H*FV/
)
%J
F



#




Q
G

<

G 
`Rx"P6r  	 @6/ 0&H

3 #L"%	&C
3V/
-(4<+'3M	`8 i
I 
!$$"6$5'
*	.!3; E	~g ( *  $
(r   )r  r  enumr  r  r!  r   r  r  r  rM  r   	functoolsr   typingr   r   r   r   r	   r
   r   r   r   r   r   r   rP  Zhttpxr   r   Zpydanticr   Ztyping_extensionsr   r   Zlitellm.litellm_core_utilsZ2litellm.litellm_core_utils.exception_mapping_utilsr   Zlitellm._loggingr   Zlitellm._uuidr   Zlitellm.caching.cachingr   r   r   r   Zlitellm.constantsr   Z"litellm.integrations.custom_loggerr   Z#litellm.litellm_core_utils.asyncifyr   r  r   r   Z,litellm.litellm_core_utils.coroutine_checkerr    Z.litellm.litellm_core_utils.credential_accessorr!   Z%litellm.litellm_core_utils.dd_tracingr"   Z*litellm.litellm_core_utils.litellm_loggingr#   r  Z0litellm.litellm_core_utils.sensitive_data_maskerr$   Z$litellm.llms.openai_like.json_loaderr%   Z&litellm.router_strategy.budget_limiterr&   Z"litellm.router_strategy.least_busyr'   Z#litellm.router_strategy.lowest_costr(   Z&litellm.router_strategy.lowest_latencyr)   Z&litellm.router_strategy.lowest_tpm_rpmr*   Z)litellm.router_strategy.lowest_tpm_rpm_v2r+   r  r-   Z)litellm.router_strategy.tag_based_routingr.   Z/litellm.router_utils.add_retry_fallback_headersr/   r0   Z litellm.router_utils.batch_utilsr1   r2   r3   Z/litellm.router_utils.client_initalization_utilsr4   Z2litellm.router_utils.clientside_credential_handlerr5   r6   r5  r7   r8   Z#litellm.router_utils.cooldown_cacher9   Z&litellm.router_utils.cooldown_handlersr:   r;   r<   r=   r>   Z,litellm.router_utils.fallback_event_handlersr?   r@   rA   Z*litellm.router_utils.get_retry_from_policyrB   r{  Z!litellm.router_utils.handle_errorrC   rD   Z>litellm.router_utils.pre_call_checks.deployment_affinity_checkrE   Z;litellm.router_utils.pre_call_checks.model_rate_limit_checkrF   ZDlitellm.router_utils.pre_call_checks.prompt_caching_deployment_checkrG   Z>litellm.router_utils.router_callbacks.track_deployment_metricsrH   rI   Zlitellm.schedulerrJ   rK   Zlitellm.types.llms.openairL   rM   rN   rO   Zlitellm.types.routerrP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   Zlitellm.types.servicesrf   Zlitellm.types.utilsrg   rh   ri   rj   r*  rk   rl   rm   r  rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   Z&router_utils.pattern_match_deploymentsry   Zopentelemetry.tracerz   _Spanr  r{   r|   r  r~   Enumr   r   r   r   r   r   <module>   s   	8`0