o
    )i                     @   s   d Z ddlZddlZddlZddlZddlZddlmZmZ ddl	Z
ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ d	ejd
eeef ddfddZdejfddZd	ejfddZdS )z?Benchmark the latency of processing a single batch of requests.    N)AnyOptional)tqdm)#convert_to_pytorch_benchmark_formatwrite_to_json)
EngineArgs)
PromptType)BeamSearchParamsargsresultsreturnc                    sR   t | d d i fdddD d}|r'tj| jd  d}t|| d S d S )	Nlatency	latenciesc                    s   i | ]}| | qS  r   ).0kr   r   c/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/benchmarks/latency.py
<dictcomp>   s    z4save_to_pytorch_benchmark_format.<locals>.<dictcomp>)avg_latencypercentiles)r
   ZmetricsZ
extra_infor   z.pytorch.json)r   ospathsplitextoutput_jsonr   )r
   r   Z
pt_recordsZpt_filer   r   r    save_to_pytorch_benchmark_format   s   

r   parserc                 C   s   | j dtdd | j dtdd | j dtdd | j dtd	d
d | j ddd | j dtddd | j dtddd | j dddd | j dtd dd | j dddd t| } | jdd d S )Nz--input-len    )typedefaultz--output-len   z--batch-size   z--n   z)Number of generated sequences per prompt.)r   r   helpz--use-beam-search
store_true)actionz--num-iters-warmup
   z'Number of iterations to run for warmup.z--num-iters   zNumber of iterations to run.z	--profilez0profile the generation process of a single batch)r%   r#   z--output-jsonz0Path to save the latency results in JSON format.z--disable-detokenizez`Do not detokenize responses (i.e. do not include detokenization time in the latency measurement)F)Zenable_prefix_caching)add_argumentintstrr   add_cli_argsset_defaults)r   r   r   r   r+   #   sN   
r+   c                    s*   j r
tjs
tdt }ddlm}m} |d i t	
|jjj j j ks0J d| jddd j j dtjjd j jfd	}d
d | D  fddd!dtt ffdd}td tt jddD ]}|d d qv j rtj}td| d ||d d S g }tt jddD ]
}||d d qt|}g d}	t ||	}
tdt!| d t"|	|
D ]\}}t| d| d qȈ j#rt!|| t$t"|	|
 d}t% j#d}t&j'||dd W d    n	1 sw   Y  t( | d S d S )"NzsThe environment variable 'VLLM_TORCH_PROFILER_DIR' is not set. Please set it to a valid path to use torch profiler.r   )LLMSamplingParamszUPlease ensure that max_model_len is greater than the sum of input_len and output_len.g      ?T)nZtemperatureZtop_p
ignore_eos
max_tokensZ
detokenizei'  )sizec                 S   s   g | ]}d |iqS )Zprompt_token_idsr   )r   batchr   r   r   
<listcomp>m   s    zmain.<locals>.<listcomp>c                      s8    j sjdd d S t j jdd d S )NF)sampling_paramsZuse_tqdmT)Z
beam_widthr1   r0   )Zuse_beam_searchgenerateZbeam_searchr	   r/   
output_lenr   )r
   dummy_promptsllmr5   r   r   llm_generateq   s   
zmain.<locals>.llm_generateprofile_dirc                    s@   | r         d S t }  t }|| }|S N)Zstart_profileZstop_profiletimeperf_counter)r;   
start_timeend_timer   )r9   r:   r   r   run_to_completion   s   zmain.<locals>.run_to_completionzWarming up...zWarmup iterations)Zdesc)r;   z%Profiling (results will be saved to 'z')...zProfiling iterations)r&      2   K   Z   c   zAvg latency: z secondsz% percentile latency: )r   r   r   w   )indentr   r<   ))ZprofileenvsZVLLM_TORCH_PROFILER_DIROSErrorr   Zfrom_cli_argsZvllmr-   r.   dataclassesasdictZ
llm_engineZmodel_configZmax_model_lenZ	input_lenr7   r/   Zdisable_detokenizenprandomrandintZ
batch_sizetolistr   r*   printr   rangeZnum_iters_warmupZ	num_itersappendarray
percentilemeanzipr   dictopenjsondumpr   )r
   Zengine_argsr-   r.   Zdummy_prompt_token_idsrA   _r;   r   Zpercentagesr   
percentagerV   r   fr   )r
   r8   r9   r:   r5   r   mainP   sv   


r`   )__doc__argparserL   r[   r   r=   typingr   r   numpyrN   r   Z	vllm.envsrJ   Zvllm.benchmarks.lib.utilsr   r   Zvllm.engine.arg_utilsr   Zvllm.inputsr   Zvllm.sampling_paramsr	   	NamespacerY   r*   r   ArgumentParserr+   r`   r   r   r   r   <module>   s*   

-