o
    )iO                     @   s~   d dl mZmZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZ eeZeG dd dZG dd	 d	ZG d
d dZdS )    )	dataclassfield)OptionalN)SpeculativeConfig)init_loggerc                   @   s|   e Zd ZU dZeed< dZeed< dZeed< dZeed< e	e
dZe
e ed< eded	d fd
dZdedefddZdS )SpecDecodingStatszPer-step iteration decoding stats from scheduler.

    Each scheduler step, statistics on spec decoding performance are
    aggregated across requests by the scheduler and returned to the
    frontend in EngineCoreOutputs->SchedulerStats.
    num_spec_tokensr   
num_draftsnum_draft_tokensnum_accepted_tokens)default_factorynum_accepted_tokens_per_posreturnc                 C   s   | |dg| dS )Nr   )r   r    )clsr   r   r   g/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/v1/spec_decode/metrics.pynew   s   zSpecDecodingStats.newc                 C   s\   |  j d7  _ |  j|7  _|  j|7  _|| jksJ t|D ]}| j|  d7  < q d S )N   )r	   r
   r   r   ranger   )selfr
   r   ir   r   r   observe_draft$   s   zSpecDecodingStats.observe_draftN)__name__
__module____qualname____doc__int__annotations__r	   r
   r   r   listr   classmethodr   r   r   r   r   r   r      s   
 r   c                   @   s<   e Zd ZdZdd Zdd ZdefddZej	fd	d
Z
dS )SpecDecodingLoggingzAggregate and log spec decoding metrics.

    LoggingStatLogger aggregates per-iteration metrics over a set
    time interval using observe() and then logs them using log()
    before resetting to zero.
    c                 C   s   |    d S N)resetr   r   r   r   __init__5   s   zSpecDecodingLogging.__init__c                 C   s   g | _ g | _g | _g | _d S r!   )r	   r
   r   accepted_tokens_per_pos_listsr#   r   r   r   r"   8   s   
zSpecDecodingLogging.resetspec_decoding_statsc                 C   s<   | j |j  | j|j | j|j | j|j d S r!   )r	   appendr
   r   r%   r   )r   r&   r   r   r   observe>   s   zSpecDecodingLogging.observec           
      C   s   | j sd S t| j }t| j}t| j}|dkr!|| d ntd}d||  }t| j}tj|dd| }ddd |D }	|d	|||||	 | 	  d S )
Nr   d   nanr   )Zaxisz, c                 s   s    | ]}|d V  qdS )z.3fNr   ).0pr   r   r   	<genexpr>U   s    z*SpecDecodingLogging.log.<locals>.<genexpr>zSpecDecoding metrics: Draft acceptance rate: %.1f%%, Mean acceptance length: %.2f, Accepted: %d tokens, Drafted: %d tokens, Per-position acceptance rate: %s)
r	   npsumr
   r   floatarrayr%   joinr"   )
r   Zlog_fnr	   r
   r   Zdraft_acceptance_rateZmean_acceptance_lengthZ
pos_matrixZacceptance_ratesZ	rates_strr   r   r   logF   s0   zSpecDecodingLogging.logN)r   r   r   r   r$   r"   r   r(   loggerinfor3   r   r   r   r   r    -   s    r    c                   @   sF   e Zd ZdZejZdee de	e
 de	e
 fddZdefdd	Zd
S )SpecDecodingProma  Record spec decoding metrics in Prometheus.

    The acceptance rate can be calculated using a PromQL query:

      rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
      rate(vllm:spec_decode_num_draft_tokens_total[$interval])

    The mean acceptance length (conventionally including bonus tokens)
    can be calculated using:

      1 + (
      rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
      rate(vllm:spec_decode_num_drafts[$interval]))

    A per-position acceptance rate vector can be computed using

      vllm:spec_decode_num_accepted_tokens_per_pos[$interval] /
      vllm:spec_decode_num_drafts[$interval]
    speculative_config
labelnameslabelvaluesc           	      C   s   |d u| _ | j s
d S | jdd|dj| | _| jdd|dj| | _| jdd|dj| | _|d us4J | j r:|jnd}|d	g }| jd
d|d}g | _t|D ]}|t	|g }| j
|j|  qPd S )Nzvllm:spec_decode_num_draftszNumber of spec decoding drafts.)nameZdocumentationr8   z!vllm:spec_decode_num_draft_tokenszNumber of draft tokens.z$vllm:spec_decode_num_accepted_tokenszNumber of accepted tokens.r   positionz,vllm:spec_decode_num_accepted_tokens_per_posz#Accepted tokens per draft position.)spec_decoding_enabled_counter_clslabelscounter_spec_decode_num_drafts$counter_spec_decode_num_draft_tokens'counter_spec_decode_num_accepted_tokensZnum_speculative_tokens/counter_spec_decode_num_accepted_tokens_per_posr   strr'   )	r   r7   r8   r9   r   Zpos_labelnamesZbase_counterposZpos_labelvaluesr   r   r   r$   ~   s\   

zSpecDecodingProm.__init__r&   c                 C   s\   | j sd S | j|j | j|j | j|j t| j	D ]\}}||j
|  qd S r!   )r<   r?   incr	   r@   r
   rA   r   	enumeraterB   r   )r   r&   rD   counterr   r   r   r(      s   zSpecDecodingProm.observeN)r   r   r   r   prometheus_clientCounterr=   r   r   r   rC   r$   r   r(   r   r   r   r   r6   g   s    
*r6   )dataclassesr   r   typingr   numpyr.   rH   Zvllm.configr   Zvllm.loggerr   r   r4   r   r    r6   r   r   r   r   <module>   s   :