o
    )iH%                     @   s   d dl Z d dlmZmZ d dlmZmZ d dlmZ er,d dl	m
Z
mZmZ d dlmZ eG dd dZeG d	d
 d
ZeG dd dZeG dd dZeG dd dZG dd dZG dd dZdS )    N)	dataclassfield)TYPE_CHECKINGOptional)SpecDecodingStats)EngineCoreEventEngineCoreOutputFinishReason)RequestStatec                   @   sB   e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed< dS )	PrefixCacheStatsz#Stores prefix cache hit statistics.Fresetr   requestsquerieshitsN)__name__
__module____qualname____doc__r   bool__annotations__r   intr   r    r   r   a/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/v1/metrics/stats.pyr      s   
 r   c                   @   s|   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed< dZ
eed< eed	Zeed
< dZee ed< dZeed< dS )SchedulerStatsz$Stats associated with the scheduler.r   num_running_reqsnum_waiting_reqsstep_countercurrent_wave        kv_cache_usagedefault_factoryprefix_cache_statsNspec_decoding_statsnum_corrupted_reqs)r   r   r   r   r   r   r   r   r   r   r   floatr   r   r"   r#   r   r   r$   r   r   r   r   r      s   
 r   c                   @   s:   e Zd ZU eedZee ed< eedZee ed< dS )	LoRAStatsr    waiting_requestsrunning_requestsN)	r   r   r   r   setr'   strr   r(   r   r   r   r   r&   2   s   
 r&   c                   @   sZ   e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed< dZeed< dZeed	< d
S )RequestStateStatsz3Stats that need to be tracked across delta updates.r   num_generation_tokensr   arrival_time	queued_tsscheduled_tsfirst_token_tslast_token_tsN)r   r   r   r   r,   r   r   r-   r%   r.   r/   r0   r1   r   r   r   r   r+   8   s   
 r+   c                   @   s~   e Zd ZU dZded< dZeed< dZeed< dZ	eed< d	Z
ee ed
< dZeed< dZeed< dZeed< dZeed< d	S )FinishedRequestStatsz)Stats associated with a finished request.r	   finish_reasonr   e2e_latencyr   num_prompt_tokensr,   Nmax_tokens_paramqueued_timeprefill_timeinference_timedecode_time)r   r   r   r   r   r4   r%   r5   r   r,   r6   r   r7   r8   r9   r:   r   r   r   r   r2   H   s   
 r2   c                   @   s   e Zd ZdZdd ZdedefddZdd	d
ededede	de
e fddZdeded dede	de
e f
ddZdddede
e de	fddZdS )IterationStatsz8Stats associated with a single set of EngineCoreOutputs.c                 C   sJ   t   | _d| _d| _d| _g | _g | _g | _g | _g | _	i | _
i | _d S )Nr   )timeiteration_timestampr,   r5   num_preempted_reqsfinished_requestsZmax_num_generation_tokens_iterZn_params_itertime_to_first_tokens_itertime_per_output_tokens_iterwaiting_lora_adaptersrunning_lora_adaptersselfr   r   r   __init__Z   s   

zIterationStats.__init__startreturnc                 C   s
   | j | S )z=Calculate an interval relative to this iteration's timestamp.)r=   )rE   rG   r   r   r   _time_sinceg   s   
zIterationStats._time_sinceoutputr   engine_core_timestampis_prefilling
prompt_len	req_stats
lora_statsc           
      C   s   t |j}|  j|7  _|r!|  j|7  _| |j}| j| | j|7  _|jd ur8| 	|j
|j||| |r>||_n||j }	| j|	 ||_d S N)lenZnew_token_idsr,   r5   rI   r-   r@   appendeventsupdate_from_events
request_idr0   r1   rA   )
rE   rJ   rK   rL   rM   rN   rO   Znum_new_generation_tokensZfirst_token_latencyZtpotr   r   r   update_from_outputk   s    



z!IterationStats.update_from_outputreq_idrS   r   c                 C   s   ddl m} |D ]@}|j|jkr|j|_|d ur|j| q|j|jkr5|j	dkr.|j|_	t
|| q|j|jkrH|  jd7  _t
|| qd S )Nr   )EngineCoreEventTyper      )vllm.v1.enginerX   typeZQUEUED	timestampr.   r'   addZ	SCHEDULEDr/   LoRARequestStatesscheduled_requestZ	PREEMPTEDr>   preempted_request)rE   rW   rS   rL   rN   rO   rX   eventr   r   r   rT      s    
z!IterationStats.update_from_eventsr3   r	   r5   r6   c                 C   sh   |  |j}|j|j }|j|j }|j|j }|j|j }	t||||j||||	|d	}
| j	|
 d S )N)	r3   r4   r5   r,   r6   r7   r8   r9   r:   )
rI   r-   r/   r.   r0   r1   r2   r,   r?   rR   )rE   r3   r5   r6   rN   r4   r7   r8   r:   r9   Zfinished_reqr   r   r   update_from_finished_request   s"   
z+IterationStats.update_from_finished_requestN)r   r   r   r   rF   r%   rI   r   r   r+   r   r&   rV   r*   listrT   rb   r   r   r   r   r;   W   s:    

r;   c                   @   s   e Zd ZdZdd Zdddee fddZdd	d
ZdddZ	dddZ
edee defddZedee defddZdee fddZdS )r^   zPer-LoRA request state stats.c                 C   s
   i | _ d S rP   )lora_name_to_statsrD   r   r   r   rF      s   
zLoRARequestStates.__init__	req_stater
   rH   c                 C   s4   |j d u rd S |j | jvrt | j|j < | j|j  S rP   )	lora_namerd   r&   )rE   re   r   r   r   	get_stats   s
   
zLoRARequestStates.get_statsc                 C   s(   |  | }d ur|j|j d S d S rP   )rg   r'   r]   rU   rE   re   rO   r   r   r   add_request   s   zLoRARequestStates.add_requestc                 C   s,   |j d u rd S | j|j  }|j|j d S rP   )rf   rd   r(   removerU   rh   r   r   r   finish_request   s   
z LoRARequestStates.finish_requestc                 C   s:   |j d u rd S | j|j  }|j|j |j|j d S rP   )rf   rd   r'   discardrU   r(   rh   r   r   r   abort_request   s
   
zLoRARequestStates.abort_requestrO   rU   c                 C   (   | d u rd S | j | | j| d S rP   )r'   rj   r(   r]   rO   rU   r   r   r   r_         z#LoRARequestStates.scheduled_requestc                 C   rn   rP   )r(   rj   r'   r]   ro   r   r   r   r`      rp   z#LoRARequestStates.preempted_requestiteration_statsc                 C   sP   |d u rd S | j  D ]\}}|jrt|j|j|< |jr%t|j|j|< qd S rP   )rd   itemsr'   rQ   rB   r(   rC   )rE   rq   rf   statsr   r   r   update_iteration_stats   s   z(LoRARequestStates.update_iteration_statsN)re   r
   )r   r   r   r   rF   r   r&   rg   ri   rk   rm   staticmethodr*   r_   r`   r;   rt   r   r   r   r   r^      s    


	r^   )r<   dataclassesr   r   typingr   r   Zvllm.v1.spec_decode.metricsr   rZ   r   r   r	   Zvllm.v1.engine.output_processorr
   r   r   r&   r+   r2   r;   r^   r   r   r   r   <module>   s&   e