o
    )i[                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZm	Z	 d dl
Zd dlZd dlmZmZ d dlmZmZ d dlmZ d dlmZ edurId dlmZ ndZeeZe  G d	d
 d
ZG dd dZG dd dZG dd dZ G dd deZ!dee" de"dee" fddZ#de"dee" fddZ$de"dee" fddZ%de&de&de&de'fdd Z(d!ee" de&de&de&fd"d#Z)G d$d% d%eZ*G d&d' d'eZ+G d(d) d)e+Z,dS )*    N)Counter)DictListOptionalTypeUnioncast)SupportsMetricsInfo
VllmConfig)StatLoggerBaseStats)ray)init_logger)metricsc                   @   sR   e Zd ZdZdZdZdZdZej	Z
ejZejZdee defdd	ZdddZdS )Metricsz
    vLLM uses a multiprocessing-based frontend for the OpenAI server.
    This means that we need to run prometheus_client in multiprocessing mode
    See https://prometheus.github.io/client_python/multiprocess/ for more
    details on limitations.
    Zfinished_reasonwaiting_lora_adaptersrunning_lora_adaptersmax_lora
labelnamesvllm_configc                 C   s  |    |jj}|jj| _| jdd|dd| _| jdd|dd| _| jdd| j| j	| j
gd	d| _| jd
d|dd| _| jdd|d| _| jdd|d| _| jdd|d| _| jdd|g dd| _| jdd|g dd| _| jdd|g dd| _g d}| jdd||d| _| jd d!||d| _| jd"d#||d| _| jd$d%||d| _| jd&d'||d| _| jd(d|t|d| _| jd)d|t|d| _| jd*d+|t|d| _| jd,d-|g d.d| _| jd/d0|t|d| _| jd1d2|t j!g d| _"d S )3Nzvllm:num_requests_runningz,Number of requests currently running on GPU.sumnamedocumentationr   multiprocess_modezvllm:num_requests_waitingz+Number of requests waiting to be processed.zvllm:lora_requests_infozRunning stats on lora requests.Zlivemostrecentzvllm:gpu_cache_usage_percz.GPU KV-cache usage. 1 means 100 percent usage.zvllm:num_preemptions_totalz0Cumulative number of preemption from the engine.)r   r   r   zvllm:prompt_tokens_totalz#Number of prefill tokens processed.zvllm:generation_tokens_totalz&Number of generation tokens processed.zvllm:iteration_tokens_totalz.Histogram of number of tokens per engine_step.)             @         i   i   i   i   i    i @  )r   r   r   bucketsz vllm:time_to_first_token_secondsz,Histogram of time to first token in seconds.)gMbP?g{Gzt?{Gz?g{Gz?g{Gz?gQ?g{Gz?皙?g      ?      ?      ?      ?      @      @      @      $@      4@      D@      T@g      d@g      @g      @z"vllm:time_per_output_token_secondsz.Histogram of time per output token in seconds.)r#   g?g?g333333?r$   g333333?g?333333?g?r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   )r/   r%   g?r'   g      ?g       @r(   r)   r+   g      .@r,   g      >@r-   g      I@g      N@g      ^@g      n@g      ~@g      @g      @g      @z vllm:e2e_request_latency_secondsz3Histogram of end to end request latency in seconds.zvllm:request_queue_time_secondsz5Histogram of time spent in WAITING phase for request.z#vllm:request_inference_time_secondsz5Histogram of time spent in RUNNING phase for request.z!vllm:request_prefill_time_secondsz5Histogram of time spent in PREFILL phase for request.z vllm:request_decode_time_secondsz4Histogram of time spent in DECODE phase for request.zvllm:request_prompt_tokenszvllm:request_generation_tokensz&vllm:request_max_num_generation_tokensz;Histogram of maximum number of requested generation tokens.zvllm:request_params_nz%Histogram of the n request parameter.)r         
      zvllm:request_params_max_tokensz.Histogram of the max_tokens request parameter.zvllm:request_success_totalz)Count of successfully processed requests.)#_unregister_vllm_metricsZmodel_configmax_model_lenZobservability_configZshow_hidden_metrics
_gauge_clsgauge_scheduler_runninggauge_scheduler_waitinglabelname_running_lora_adapterslabelname_max_loralabelname_waiting_lora_adaptersgauge_lora_infogauge_gpu_cache_usage_counter_clscounter_num_preemptioncounter_prompt_tokenscounter_generation_tokens_histogram_clshistogram_iteration_tokenshistogram_time_to_first_tokenhistogram_time_per_output_tokenhistogram_e2e_time_requesthistogram_queue_time_request histogram_inference_time_requesthistogram_prefill_time_requesthistogram_decode_time_requestbuild_1_2_5_buckets#histogram_num_prompt_tokens_request'histogram_num_generation_tokens_request+histogram_max_num_generation_tokens_requesthistogram_n_requesthistogram_max_tokens_requestr   labelname_finish_reasoncounter_request_success)selfr   r   r5   Zrequest_latency_buckets rT   _/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/engine/metrics.py__init__.   s  	
zMetrics.__init__returnNc                 C   s6   t tjjD ]}t|drd|jv rtj| qd S )N_nameZvllm)listprometheus_clientZREGISTRYZ_collector_to_nameshasattrrX   
unregister)rS   	collectorrT   rT   rU   r4      s
   z Metrics._unregister_vllm_metricsrW   N)__name__
__module____qualname____doc__rQ   r;   r9   r:   rZ   Gauger6   r   r>   	HistogramrB   r   strr
   rV   r4   rT   rT   rT   rU   r      s     r   c                	   @   s`   e Zd ZdZ			ddededeee  defdd	Zd
d Zde	e
ef fddZdd ZdS )_RayGaugeWrapperzVWraps around ray.util.metrics.Gauge to provide same API as
    prometheus_client.Gauge Nr   r   r   r   c                 C   s(   ~|rt |nd }tj|||d| _d S N)r   descriptiontag_keys)tupleray_metricsrc   _gauge)rS   r   r   r   r   labelnames_tuplerT   rT   rU   rV      s   z_RayGaugeWrapper.__init__c                 K      | j | | S N)rm   set_default_tagsrS   labelsrT   rT   rU   rs         z_RayGaugeWrapper.labelsvaluec                 C      | j |S rp   )rm   setrS   ru   rT   rT   rU   rw         z_RayGaugeWrapper.setc                 C   s   | j t S rp   )rm   rw   timerS   rT   rT   rU   set_to_current_time   s   z$_RayGaugeWrapper.set_to_current_time)rg   Nrg   )r_   r`   ra   rb   re   r   r   rV   rs   r   intfloatrw   r|   rT   rT   rT   rU   rf      s"    

rf   c                	   @   sT   e Zd ZdZ		ddededeee  fddZd	d
 Zdde	e
ef fddZdS )_RayCounterWrapperzZWraps around ray.util.metrics.Counter to provide same API as
    prometheus_client.Counterrg   Nr   r   r   c                 C   s&   |rt |nd }tj|||d| _d S rh   )rk   rl   r   _counter)rS   r   r   r   rn   rT   rT   rU   rV      s
   z_RayCounterWrapper.__init__c                 K   ro   rp   )r   rq   rr   rT   rT   rU   rs      rt   z_RayCounterWrapper.labelsr'   ru   c                 C   s   |dkrd S | j |S )Nr   )r   incrx   rT   rT   rU   r      s   z_RayCounterWrapper.inc)rg   N)r'   )r_   r`   ra   rb   re   r   r   rV   rs   r   r}   r~   r   rT   rT   rT   rU   r      s    

	r   c                   @   s`   e Zd ZdZ			ddededeee  deee  fdd	Zd
d Z	de
eef fddZdS )_RayHistogramWrapperz^Wraps around ray.util.metrics.Histogram to provide same API as
    prometheus_client.Histogramrg   Nr   r   r   r"   c                 C   s4   |rt |nd }|r|ng }tj||||d| _d S )N)r   ri   rj   
boundaries)rk   rl   rd   
_histogram)rS   r   r   r   r"   rn   r   rT   rT   rU   rV     s   z_RayHistogramWrapper.__init__c                 K   ro   rp   )r   rq   rr   rT   rT   rU   rs     rt   z_RayHistogramWrapper.labelsru   c                 C   rv   rp   )r   observerx   rT   rT   rU   r     ry   z_RayHistogramWrapper.observe)rg   NN)r_   r`   ra   rb   re   r   r   r~   rV   rs   r   r}   r   rT   rT   rT   rU   r     s     


r   c                       s   e Zd ZU dZeeej eZ	eej e
d< eeej eZeej e
d< eeej eZeej e
d< dee def fddZdddZ  ZS )
RayMetricsz
    RayMetrics is used by RayPrometheusStatLogger to log to Ray metrics.
    Provides the same metrics as Metrics but uses Ray's util.metrics library.
    r6   r>   rB   r   r   c                    s"   t d u rtdt || d S )Nz(RayMetrics requires Ray to be installed.)rl   ImportErrorsuperrV   )rS   r   r   	__class__rT   rU   rV   %  s   zRayMetrics.__init__rW   Nc                 C      d S rp   rT   r{   rT   rT   rU   r4   *  s   z#RayMetrics._unregister_vllm_metricsr^   )r_   r`   ra   rb   r   r   rZ   rc   rf   r6   __annotations__r   r   r>   rd   r   rB   r   re   r
   rV   r4   __classcell__rT   rT   r   rU   r     s   
 


r   mantissa_lst	max_valuerW   c                 C   sD   d}g }	 | D ]}|d|  }||kr| | q|  S |d7 }q)z
    Builds a list of buckets with increasing powers of 10 multiplied by
    mantissa values until the value exceeds the specified maximum.

    r   Tr2   r   )append)r   r   exponentr"   mru   rT   rT   rU   build_buckets/  s   r   c                 C      t g d| S )zR
    Example:
    >>> build_1_2_5_buckets(100)
    [1, 2, 5, 10, 20, 50, 100]
    )r   r0   r1   r   r   rT   rT   rU   rK   A     rK   c                 C   r   )zd
    Example:
    >>> build_1_2_3_5_8_buckets(100)
    [1, 2, 3, 5, 8, 10, 20, 30, 50, 80, 100]
    )r   r0      r1   r   r   r   rT   rT   rU   build_1_2_3_5_8_bucketsJ  r   r   nowlast_loglocal_intervalc                 C   s   | | }||kS rp   rT   )r   r   r   Zelapsed_timerT   rT   rU   local_interval_elapsedS  s   r   tracked_statsc                 C   s   t t| ||  S rp   )r~   npr   )r   r   r   rT   rT   rU   get_throughputY  s   r   c                       s`   e Zd ZdZdededdf fddZdeddfd	d
ZdddZ	de
deddfddZ  ZS )LoggingStatLoggerz8LoggingStatLogger is used in LLMEngine to log to Stdout.r   r   rW   Nc                    s   t  || d | _d | _d S rp   )r   rV   last_prompt_throughputlast_generation_throughput)rS   r   r   r   rT   rU   rV   a  s   
zLoggingStatLogger.__init__statsc              
   C   s   | j |j | j|j t|j| j| jrnt	| j |j| jd}t	| j|j| jd}t
j}t||| j| jfs;t
j}|d|||j|j|j|jd |jd  |jdksY|jdkre|d|jd |jd  | ||| dS dS )zQCalled by LLMEngine.
           Logs to Stdout every self.local_interval seconds.)r   r   zAvg prompt throughput: %.1f tokens/s, Avg generation throughput: %.1f tokens/s, Running: %d reqs, Swapped: %d reqs, Pending: %d reqs, GPU KV cache usage: %.1f%%, CPU KV cache usage: %.1f%%.d   r   z/Prefix cache hit rate: GPU: %.2f%%, CPU: %.2f%%N)num_prompt_tokensr   num_prompt_tokens_iternum_generation_tokensnum_generation_tokens_iterr   r   last_local_logr   r   loggerinfoanyr   r   debugnum_running_sysZnum_swapped_sysnum_waiting_sysgpu_cache_usage_sysZcpu_cache_usage_sysZcpu_prefix_cache_hit_rateZgpu_prefix_cache_hit_rate_reset)rS   r   prompt_throughputgeneration_throughputZlog_fnrT   rT   rU   logf  sN   


zLoggingStatLogger.logc                 C   s$   g | _ g | _|j| _|| _|| _d S rp   )r   r   r   r   r   r   )rS   r   r   r   rT   rT   rU   r     s
   
zLoggingStatLogger._resettypeobjc                 C   s   t rp   )NotImplementedErrorrS   r   r   rT   rT   rU   r        zLoggingStatLogger.infor^   )r_   r`   ra   rb   r~   r
   rV   r   r   r   re   r	   r   r   rT   rT   r   rU   r   ^  s    
4r   c                       s   e Zd ZdZeZejZde	de
eef deddf fddZd	eee	f ddfd
dZd	eee	f ddfddZd	ededdfddZd	eee ee	 f ddfddZd	e
eef ddfddZdeddfddZdefddZdededdfddZ  ZS )PrometheusStatLoggerz;PrometheusStatLogger is used LLMEngine to log to Promethus.r   rs   r   rW   Nc                    s0   t  || || _| jt| |d| _d S )N)r   r   )r   rV   rs   _metrics_clsrY   keysr   )rS   r   rs   r   r   rT   rU   rV     s
   zPrometheusStatLogger.__init__datac                 C   s   |j di | j | d S NrT   )rs   rw   rS   Zgauger   rT   rT   rU   
_log_gauge  s   zPrometheusStatLogger._log_gaugec                 C   s6   |dk rt d|| d S |jdi | j| d S )Nr   z'Skipping negative increment of %g to %srT   )r   warningrs   r   )rS   counterr   rT   rT   rU   _log_counter  s   z!PrometheusStatLogger._log_counter	label_keyc                 C   s:   |  D ]\}}|jdi i | j||i| qd S r   )itemsrs   r   )rS   r   r   r   labelcountrT   rT   rU   _log_counter_labels  s   &z(PrometheusStatLogger._log_counter_labelsc                 C   s&   |D ]}|j di | j | qd S r   )rs   r   )rS   Z	histogramr   ZdatumrT   rT   rU   _log_histogram  s   z#PrometheusStatLogger._log_histogramc                 C   s   |j di |  d S r   )rs   r|   r   rT   rT   rU   _log_gauge_string  s   z&PrometheusStatLogger._log_gauge_stringr   c                 C   s  |  | jj|j |  | jj|j |  | jj|j | jjd	|j
| jjd	|j| jj|ji}| | jj| | | jj|j | | jj|j | | jj|j | | jj|jg | | jj|j | | jj|j | | jj|j  | | jj!|j" | | jj#|j$ | | jj%|j& | | jj'|j( t)|j*}| +| jj,|t-j. | | jj/|j0 | | jj1|j2 | | jj3|j4 | | jj5|j6 | | jj7|j8 d S )N,)9r   r   r7   r   r8   r   r=   r   r9   joinr   r;   r   r:   r   r   r<   r   r?   Znum_preemption_iterr@   r   rA   r   r   rC   Znum_tokens_iterrD   Ztime_to_first_tokens_iterrE   Ztime_per_output_tokens_iterrF   Ztime_e2e_requestsrG   Ztime_queue_requestsrH   Ztime_inference_requestsrI   Ztime_prefill_requestsrJ   Ztime_decode_requestsCollectionsCounterZfinished_reason_requestsr   rR   r   rQ   rL   Znum_prompt_tokens_requestsrM   Znum_generation_tokens_requestsrO   Z
n_requestsrN   Z"max_num_generation_tokens_requestsrP   Zmax_tokens_requests)rS   r   Z	lora_infoZfinished_reason_counterrT   rT   rU   _log_prometheus  s   


















z$PrometheusStatLogger._log_prometheusc                 C   sT   |  | | j|j | j|j t|j| j| j	r(g | _g | _|j| _dS dS )z5Logs to prometheus and tracked stats every iteration.N)
r   r   r   r   r   r   r   r   r   r   )rS   r   rT   rT   rU   r     s   

zPrometheusStatLogger.logr   r   c                 C   sD   |dkr |  }| jdd| dd}|jdi |d d S d S )NZcache_configzvllm:cache_config_infoz(Information of the LLMEngine CacheConfigZ
mostrecentr   r   rT   )metrics_infor6   r   rs   rw   )rS   r   r   r   Z
info_gaugerT   rT   rU   r      s   zPrometheusStatLogger.info)r_   r`   ra   rb   r   r   rZ   rc   r6   r~   r   re   r
   rV   r   r}   r   r   r   r   r   r   r   r   r   r   r	   r   r   rT   rT   r   rU   r     s2    	


?r   c                   @   s*   e Zd ZdZeZdededdfddZdS )RayPrometheusStatLoggerz1RayPrometheusStatLogger uses Ray metrics instead.r   r   rW   Nc                 C   r   rp   rT   r   rT   rT   rU   r   2  r   zRayPrometheusStatLogger.info)	r_   r`   ra   rb   r   r   re   r	   r   rT   rT   rT   rU   r   .  s    r   )-rz   typingr   r   r   r   r   r   r   r   numpyr   rZ   Zvllm.configr	   r
   Zvllm.engine.metrics_typesr   r   Zvllm.executor.ray_utilsr   Zvllm.loggerr   Zray.utilr   rl   r_   r   Zdisable_created_metricsr   rf   r   r   r   r}   r   rK   r   r~   boolr   r   r   r   r   rT   rT   rT   rU   <module>   sJ     2	
	

H 	