o
    ưiy`                     @   s   d dl Z d dlmZmZ d dlmZmZmZmZmZm	Z	 d dl
Z
d dl
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ erWd d
lmZ e	eef ZneZG dd deZG dd deZdS )    N)datetime	timedelta)TYPE_CHECKINGAnyDictListOptionalUnion)ModelResponsetoken_counterverbose_logger)	DualCache)CustomLogger)safe_divide_seconds)!_get_parent_otel_span_from_kwargs)LiteLLMPydanticObjectBase)Spanc                   @   s2   e Zd ZU dZeed< dZeed< dZeed< dS )RoutingArgsi  ttlr   lowest_latency_buffer
   max_latency_list_sizeN)	__name__
__module____qualname__r   float__annotations__r   r   int r   r   ]/home/app/Keep/.python/lib/python3.10/site-packages/litellm/router_strategy/lowest_latency.pyr      s   
 r   c                   @   s<  e Zd ZU dZeed< dZeed< dZeed< i fde	de
fdd	Zd
d Zdd Zdd Z				ddededeeeeef   deeeef  dee dee fddZ			ddededeeeeef   deeeef  dee f
ddZ			ddededeeeeef   deeeef  dee f
ddZdS )LowestLatencyLoggingHandlerF	test_flagr   logged_successlogged_failurerouter_cacherouting_argsc                 C   s   || _ tdi || _d S )Nr   )r$   r   r%   )selfr$   r%   r   r   r   __init__"   s   z$LowestLatencyLoggingHandler.__init__c              
   C   sb  z	 |  |}|d |d u rW d S |d | dd }|d dp&i dd }|d u s3|d u r6W d S t|tr?t|}	 | d}t d}	t d}
t d}|	 d	|
 d	| }|| }d }|d
d d ur|d
 du r|d|| }|}d }d}t|trt	|dd }|d ur|j
}|j}t|tr| }n|}t||}|d urt|}n|}|d urt|tr| }n|}t||}t|}| jj||dpi }||vri ||< t|| dg | jjk r|| dg | n|| d d | jjd  |g || d< |d urFt|| dg | jjk r1|| dg | n|| d d | jjd  |g || d< ||| vrSi || |< || | dd| || | d< || | ddd || | d< | jj||| jjd | jr|  jd7  _W d S W d S  ty } ztdt| W Y d }~d S d }~ww )Nlitellm_paramsmodel_group
model_infoid_map%Y-%m-%d%H%M-streamTcompletion_start_timer   usagekeyparent_otel_spanlatency   time_to_first_tokentpmrpmr5   valuer   `litellm.proxy.hooks.prompt_injection_detection.py::async_pre_call_hook(): Exception occured - {}) _select_metadata_fieldget
isinstancer   strr   nowstrftimer
   getattrcompletion_tokenstotal_tokensr   total_secondsr   r   r   r$   	get_cachelenr%   r   
setdefaultappendZ	set_cacher   r!   r"   	Exceptionr   	exceptionformat)r&   kwargsresponse_obj
start_timeend_timemetadata_fieldr)   r+   latency_keycurrent_datecurrent_hourcurrent_minuteprecise_minuteresponse_ms!time_to_first_token_response_timefinal_valuer9   rG   _usagerF   response_secondsttft_secondsr6   request_count_dicter   r   r   log_success_event(   s   














z-LowestLatencyLoggingHandler.log_success_eventc              
      s  z|  |}|dd}t|tjr|d |du rW dS |d | dd}|d dp1i dd}|du s>|du rAW dS t|trJt|}	 | d}	| jj|	dI dH p[i }
||
vrdi |
|< t	|
| d	g | j
jk r~|
| d	g d
 n|
| d	 d| j
jd  d
g |
| d	< | jj|	|
| j
jdI dH  W dS W dS  ty } ztdt| W Y d}~dS d}~ww )zR
        Check if Timeout Error, if timeout set deployment latency -> 100
        rN   Nr(   r)   r*   r+   r,   r5   r7   g     @@r8   r<   r>   )r?   r@   rA   litellmTimeoutr   rB   r$   async_get_cacherJ   r%   r   rK   rL   async_set_cacher   rM   r   rN   rO   )r&   rP   rQ   rR   rS   rT   
_exceptionr)   r+   rU   r`   ra   r   r   r   async_log_failure_event   s^   



z3LowestLatencyLoggingHandler.async_log_failure_eventc              
      st  z	 |  |}|d |d u rW d S |d | dd }|d dp'i dd }|d u s4|d u r7W d S t|tr@t|}	 | d}t d}	t d}
t d}|	 d	|
 d	| }|| }d }|d
d d ur|d
 du r|d|| }|}d}d }t|trt	|dd }|d ur|j
}|j}t|tr| }n|}t||}|d urt|}n|}|d urt|tr| }n|}t||}t|}| jj||ddI d H pi }||vri ||< t|| dg | jjk r|| dg | n|| d d | jjd  |g || d< |d urLt|| dg | jjk r7|| dg | n|| d d | jjd  |g || d< ||| vrYi || |< || | dd| || | d< || | ddd || | d< | jj||| jjdI d H  | jr|  jd7  _W d S W d S  ty } ztdt| W Y d }~d S d }~ww )Nr(   r)   r*   r+   r,   r-   r.   r/   r0   r1   Tr2   r   r3   )r5   r6   
local_onlyr7   r8   r9   r:   r;   r<   z\litellm.router_strategy.lowest_latency.py::async_log_success_event(): Exception occured - {}) r?   r@   rA   r   rB   r   rC   rD   r
   rE   rF   rG   r   rH   r   r   r   r$   rf   rJ   r%   r   rK   rL   rg   r   r!   r"   rM   r   rN   rO   )r&   rP   rQ   rR   rS   rT   r)   r+   rU   rV   rW   rX   rY   rZ   r[   r\   rG   r9   r]   rF   r^   r_   r6   r`   ra   r   r   r   async_log_success_event  s   









	



z3LowestLatencyLoggingHandler.async_log_success_eventNr)   healthy_deploymentsmessagesinputrequest_kwargsr`   c           $         sH  i }t dt d}t d}	t d}
| d|	 d|
 }d}|du r-dS |}|D ]}|d d |vrKd	d
g|d
d
di||d d < q1zt||d}W n ty_   d
}Y nw | }tt	|t
|}t|}g }| D ]\}}d}|D ]}||d d kr|}q|du rqx|ddp|di ddp|di ddpt d}|ddp|di ddp|di ddpt d}|d	g }|dg }||i dd
}||i dd
}d}|dur|dddur|d du rt
|d
kr|D ]}t|t r||7 }qn|D ]}t|t r+||7 }q|t
| }|di dd}|durF|||< || |ksT|d |krUqx|||f qxt
|d
krfdS t|dd d} | d
 d | jj   fdd| D }!t|!}"|"d
 }| |}#|dur|#|v r|||# d< |S )z>Common logic for both sync and async get_available_deploymentsinfr-   r.   r/   r0   Nr*   r+   r7   r   )r:   r;   )rm   textr:   r(   r;   r9   g        r1   TZapi_base r8   c                 S   s   | d S )Nr8   r   )xr   r   r   <lambda>  s    zHLowestLatencyLoggingHandler._get_available_deployments.<locals>.<lambda>rc   c                    s    g | ]}|d    kr|qS )r8   r   ).0rs   bufferZlowest_latencyr   r   
<listcomp>  s    zJLowestLatencyLoggingHandler._get_available_deployments.<locals>.<listcomp>_latency_per_deployment)r   r   rC   rD   r   rM   itemsrandomsamplelistrJ   dictr@   rA   rL   sortedr%   r   choicer?   )$r&   r)   rl   rm   rn   ro   r`   ry   rV   rW   rX   rY   Z
deploymentZall_deploymentsdZinput_tokensZ_itemsZ_all_deploymentsZpotential_deploymentsitemZitem_mapZ_deploymentmZ_deployment_tpmZ_deployment_rpmZitem_latencyZitem_ttft_latencyZitem_rpmZitem_tpmtotalZ_call_latencyZ_deployment_api_baseZsorted_deploymentsZvalid_deploymentsZrandom_valid_deploymentrT   r   rv   r   _get_available_deployments  s   




z6LowestLatencyLoggingHandler._get_available_deploymentsc           	         sB   | d}t |}| jj||dI d H pi }| ||||||S )Nr,   r4   )r   r$   rf   r   	r&   r)   rl   rm   rn   ro   rU   r6   r`   r   r   r   async_get_available_deployments,  s$   
	z;LowestLatencyLoggingHandler.async_get_available_deploymentsc           	      C   s:   | d}t |}| jj||dpi }| ||||||S )z>
        Returns a deployment with the lowest latency
        r,   r4   )r   r$   rI   r   r   r   r   r   get_available_deploymentsJ  s"   
z5LowestLatencyLoggingHandler.get_available_deployments)NNNN)NNN)r   r   r   r!   boolr   r"   r   r#   r   r~   r'   rb   ri   rk   rB   r}   r   r   r   r	   r   r   r   r   r   r   r   r       sz   
 
 E 
 
"r    )r{   r   r   typingr   r   r   r   r   r	   rd   r
   r   r   Zlitellm.caching.cachingr   Z"litellm.integrations.custom_loggerr   Z'litellm.litellm_core_utils.core_helpersr   r   Zlitellm.types.utilsr   Zopentelemetry.tracer   _Spanr   r    r   r   r   r   <module>   s     