o
    ưi5                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZmZ d dl	Z	e
eZd dlmZmZ g dZddgZdee fdd	Zdefd
dZdd Zd*dedee fddZdedefddZdd ZdefddZdededeeef fddZ 			d+dedee dee dee fd d!Z!d"ed#edefd$d%Z"						d,dedee d&ee dee d'ee dee dee fd(d)Z#dS )-    N)ListOptional)DEFAULT_HEALTH_CHECK_PROMPTHEALTH_CHECK_TIMEOUT_SECONDS)messagesZapi_keypromptinputZvertex_credentialsZaws_access_key_idZaws_secret_access_keymodelZ
mode_errorreturnc                  C   sR   zddl } | | jj}tjdkrt|d W S t|d W S  ty(   Y dS w )zj
    Get process RSS memory in MB.
    On Linux, ru_maxrss is in KB. On macOS, ru_maxrss is in bytes.
    r   Ndarwini   i   )resourceZ	getrusageZRUSAGE_SELF	ru_maxrsssysplatformfloat	Exception)r   r    r   Q/home/app/Keep/.python/lib/python3.10/site-packages/litellm/proxy/health_check.py_get_process_rss_mb   s   
r   c                  C   s   t  } | d u r	dS | dS )Nunknownz.2f)r   )Zrss_mbr   r   r   _rss_mb_for_log-   s   r   c                  C   s   ddg} dt | dgS )z,
    Get a random message from the LLM.
    zHey how's it going?zWhat's 1 + 1?user)Zrolecontent)randomchoice)r   r   r   r   _get_random_llm_message4   s   r   Tendpoint_datadetailsc                 C   s8   |  dd |durdd |  D S dd |  D S )z7
    Clean the endpoint data for display to users.
    Zlitellm_logging_objNFc                 S   s   i | ]\}}|t vr||qS r   )ILLEGAL_DISPLAY_PARAMS.0kvr   r   r   
<dictcomp>C       z(_clean_endpoint_data.<locals>.<dictcomp>c                 S   s   i | ]\}}|t v r||qS r   )MINIMAL_DISPLAY_PARAMSr   r   r   r   r#   E   r$   )popitems)r   r   r   r   r   _clean_endpoint_data=   s   r(   
model_listc                 C   sZ   t  }g }| D ]#}|dpi }|dpd }|d u rq||vr*|| || q|S )N
model_infoid)setgetaddappend)r)   Zseen_idsZfiltered_deploymentsZ
deploymentZ_model_infoZ_idr   r   r   filter_deployments_by_idI   s   

r0   c                    s4   z
t | |I d H W S  t jy   ddi Y S w )NerrorzTimeout exceeded)asynciowait_forTimeoutError)tasktimeoutr   r   r   run_with_timeout\   s   r7   c                    sZ   | d }|  di }| dd }t||}| dpt}ttj||tdgd|I d H S )Nlitellm_paramsr*   modeZhealth_check_timeoutztest from litellm)r9   r   r   )r-   '_update_litellm_params_for_health_checkr   r7   litellmZahealth_checkr   )r	   r8   r*   r9   r6   r   r   r   _run_model_health_checke   s   

r<   modelsconcurrency_limitc           	         s   dgt |  }i tt|  ddtf fdd}tt|t | D ]}|  q&rmtjt	 tj
dI dH \}}|D ])}|}z| ||< W n tyf } z
|||< W Y d}~nd}~ww |  qAs.|fS )z{
    Run health checks with at most `concurrency_limit` active tasks.
    Preserves result ordering to match `models`.
    Nr   r
   c                     sN   zt  \} }W n
 ty   Y dS w tt|}| |< ttdS )NFT)nextStopIterationr2   create_taskr<   maxlen)idxZ
next_modelr5   Z
model_iterpeak_in_flightZtasks_to_indexr   r   _schedule_next   s   zC_run_health_checks_with_bounded_concurrency.<locals>._schedule_next)return_when)rC   iter	enumerateboolrangeminr2   waitr,   keysFIRST_COMPLETEDr&   resultr   )	r=   r>   resultsrG   _doner5   rD   er   rE   r   +_run_health_checks_with_bounded_concurrencyw   s0   

rV   max_concurrencyinstrumentation_contextc                    sF  |pi }t |dd}|dd}|dd}d}d}t|tr3|dkr3d}t| |I d	H \}	}nd
d | D }
t|
}tj|
ddiI d	H }	|r]t	d|||t| ||t
 t 	 g }g }t|	| D ]8\}}|d }t|trd|vr|ti ||| qft|tr|ti ||| qf|t|| qf||fS )z
    Perform a health check for each model in the list.

    max_concurrency: Optional limit on concurrent health check requests.
    enabledFcycle_idr   sourceZ	unboundedr   ZboundedNc                 S   s   g | ]	}t t|qS r   )r2   rA   r<   )r    r	   r   r   r   
<listcomp>   s    z)_perform_health_check.<locals>.<listcomp>return_exceptionsTzhealth_check_dispatch_summary source=%s cycle_id=%s mode=%s model_count=%d max_concurrency=%s peak_in_flight=%d thread_count=%d rss_mb=%sr8   r1   )rK   r-   
isinstanceintrV   rC   r2   gatherloggerdebug	threadingactive_countr   zipdictr/   r(   )r)   r   rW   rX   instrumentation_enabledrZ   r[   Zdispatch_moderF   rR   taskshealthy_endpointsunhealthy_endpointsZ
is_healthyr	   r8   r   r   r   _perform_health_check   sV   
rk   r*   r8   c           	      C   s  t  |d< | dd}|dur||d< nd| dp |dp dvr&d	|d< | dd}|dur4||d< | d
ddkrD| dd|d< |d drddlm} |d }|dr`|dd }|d}g }|D ]}||jvru|| qid|}||d< |S )a  
    Update the litellm params for health check.

    - gets a short `messages` param for health check
    - updates the `model` param with the `health_check_model` if it exists Doc: https://docs.litellm.ai/docs/proxy/health#wildcard-routes
    - updates the `voice` param with the `health_check_voice` for `audio_speech` mode if it exists Doc: https://docs.litellm.ai/docs/proxy/health#text-to-speech-models
    - for Bedrock models with region routing (bedrock/region/model), strips the litellm routing prefix but preserves the model ID
    r   Zhealth_check_max_tokensNZ
max_tokens*Zhealth_check_modelr	       r9   Zaudio_speechZhealth_check_voicealloyZvoicezbedrock/r   )BedrockModelInfo   /)	r   r-   
startswithZ!litellm.llms.bedrock.common_utilsrp   splitZall_global_regionsr/   join)	r*   r8   Z_health_check_max_tokensZ_health_check_modelrp   r	   partsZfiltered_partspartr   r   r   r:      s6   



	


r:   	cli_modelmodel_idc                    s  |pi }t |dd}|dd}|dd}	| s3|r&|d|idg} n|r/td|	| g g fS t }
t| }d	urMfd
d| D }|rL|} n d	uri fdd| D }|g krg fdd| D }|} t| }t| d} t| }|rtd|	|||||t	 t
 	 zt| |||dI d	H \}}W n ty   |rtd|	||t |
 d t	 t
   w |rtd|	||t|t|t |
 d t	 t
 	 ||fS )av  
    Perform a health check on the system.

    When model_id is provided, only the deployment with that id is checked
    (so models that share the same name but have different ids are checked separately).
    When model (name) is provided, all deployments matching that name are checked.

    Returns:
        (bool): True if the health check passes, False otherwise.
    rY   FrZ   r   r[   r	   )
model_namer8   zAhealth_check_cycle_skipped source=%s cycle_id=%s reason=no_modelsNc                    s(   g | ]}| d p
i  d kr|qS )r*   r+   )r-   r    x)ry   r   r   r\   L  s    "z(perform_health_check.<locals>.<listcomp>c                    s    g | ]}|d  d  kr|qS )r8   r	   r   r{   r	   r   r   r\   R  s    c                    s   g | ]
}|d   kr|qS )rz   r   r{   r}   r   r   r\   V  s    )r)   zhealth_check_cycle_start source=%s cycle_id=%s requested_model_count=%d post_model_filter_count=%d deduped_model_count=%d max_concurrency=%s thread_count=%d rss_mb=%s)rW   rX   zihealth_check_cycle_failed source=%s cycle_id=%s model_count=%d duration_ms=%.2f thread_count=%d rss_mb=%si  zhealth_check_cycle_complete source=%s cycle_id=%s model_count=%d healthy_count=%d unhealthy_count=%d duration_ms=%.2f thread_count=%d rss_mb=%s)rK   r-   ra   rb   time	monotonicrC   r0   rc   rd   r   rk   r   	exception)r)   r	   rx   r   ry   rW   rX   rg   rZ   r[   Zcycle_start_timeZrequested_model_countZ_by_idZ_new_model_listZpost_filter_model_countZdeduped_model_countri   rj   r   )r	   ry   r   perform_health_check!  s   

	r   )T)TNN)NNTNNN)$r2   loggingr   r   rc   r~   typingr   r   r;   	getLogger__name__ra   Zlitellm.constantsr   r   r   r%   r   r   strr   r   rf   rK   r(   r0   r7   r<   listr_   tuplerV   rk   r:   r   r   r   r   r   <module>   s   

	
	

,
@
B