o
    )iI                     @   s  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlZd dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2m3Z3 d dl4m5Z6 e0e7Z8de2fddZ9dd Z:dZ;G dd dZ<de=de=fdd Z>d!e=d"e?e ddfd#d$Z@d%e=d&e=d'eAddfd(d)ZBde=d"e?e d*e=ddfd+d,ZCd-ed.e=defd/d0ZDd-ed.e=defd1d2ZEd3ed-ed4e<defd5d6ZFd7ed8ed9eddfd:d;ZGd9efd<d=ZHe7d>krSe: ZIe8Jd?e6 e8Jd@eI eIjKrEe8JdA eeIjLeIjMdB ne8JdC e NeHeI dS dS )D    N)	Namespace)	Awaitable)
HTTPStatus)StringIO)CallableOptional)start_http_server)tqdm)
VllmConfig)AsyncEngineArgsoptional_type)EngineClient)RequestLogger)BatchRequestInputBatchRequestOutputBatchResponseDataChatCompletionResponseEmbeddingResponseErrorResponseRerankResponseScoreResponse)OpenAIServingChat)OpenAIServingEmbedding)BaseModelPathOpenAIServingModels)ServingScores)init_logger)FlexibleArgumentParserrandom_uuid)__version__parserc                 C   s   | j dddtdd | j dddtdd | j d	td d
d | j dttddd t| } | j dtd dd | j dddd | j dtddd | j dtddd | j ddddd | S )Nz-iz--input-fileTzThe path or url to a single input file. Currently supports local file paths, or the http protocol (http or https). If a URL is specified, the file should be available via HTTP GET.)requiredtypehelpz-oz--output-filezThe path or url to a single output file. Currently supports local file paths, or web (http or https) urls. If a URL is specified, the file should be available via HTTP PUT.z--output-tmp-dirzMThe directory to store the output file before uploading it to the output URL.)r"   defaultr#   z--response-roleZ	assistantz@The role name to return if `request.add_generation_prompt=True`.z--max-log-lenz^Max number of prompt characters or prompt ID numbers being printed in log.

Default: Unlimitedz--enable-metrics
store_truezEnable Prometheus metrics)actionr#   z--urlz0.0.0.0zLURL to the Prometheus metrics server (only needed if enable-metrics is set).z--porti@  zUPort number for the Prometheus metrics server (only needed if enable-metrics is set).z--enable-prompt-tokens-detailsFz6If set to True, enable prompt_tokens_details in usage.)r&   r$   r#   )add_argumentstrr   r   Zadd_cli_argsintr     r+   m/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/entrypoints/openai/run_batch.pymake_arg_parser*   sl   	
r-   c                  C   s   t dd} t|  S )Nz$vLLM OpenAI-Compatible batch runner.)description)r   r-   
parse_argsr*   r+   r+   r,   r/   l   s   r/   z_{desc}: {percentage:3.0f}% Completed | {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]
c                   @   s2   e Zd Zdd Zdd Zdd Zdefdd	Zd
S )BatchProgressTrackerc                 C   s   d| _ d | _d S )Nr   )_total_pbarselfr+   r+   r,   __init__{   s   
zBatchProgressTracker.__init__c                 C   s   |  j d7  _ d S )N   )r1   r3   r+   r+   r,   	submitted   s   zBatchProgressTracker.submittedc                 C   s   | j r
| j   d S d S N)r2   updater3   r+   r+   r,   	completed   s   zBatchProgressTracker.completedreturnc                 C   s:   t j  pt j dk}t| jddd| td| _| jS )Nr   reqzRunning batch   )totalunitZdescZminintervaldisableZ
bar_format)torchdistributedZis_initializedZget_rankr	   r1   _BAR_FORMATr2   )r4   Zenable_tqdmr+   r+   r,   pbar   s   zBatchProgressTracker.pbarN)__name__
__module____qualname__r5   r7   r:   r	   rD   r+   r+   r+   r,   r0   y   s
    r0   path_or_urlr;   c              
      s   |  ds|  dr\t 4 I d H <}|| 4 I d H }| I d H W  d   I d H  W  d   I d H  S 1 I d H s?w   Y  W d   I d H  d S 1 I d H sUw   Y  d S t| dd}| W  d    S 1 sqw   Y  d S )Nhttp://https://utf-8encoding)
startswithaiohttpClientSessiongettextopenread)rH   sessionrespfr+   r+   r,   	read_file   s   
p$rX   output_pathbatch_outputsc                    sP   t | ddd}|D ]
}t| |d qW d   dS 1 s!w   Y  dS )z
    Write the responses to a local file.
    output_path: The path to write the responses to.
    batch_outputs: The list of batch outputs to write.
    wrK   rL   fileN)rS   printmodel_dump_json)rY   rZ   rW   or+   r+   r,   write_local_file   s   	"ra   
output_urldata_or_file	from_filec           
         s  d}d}t d|d D ]}ztjtjddd4 I dH }|rit|d;}|j| |d4 I dH }|jd	krDtd
|j d|  W d  I dH  n1 I dH sTw   Y  W d   n1 scw   Y  n3|j| |d4 I dH }|jd	krtd|j d|  W d  I dH  n1 I dH sw   Y  W d  I dH  n1 I dH sw   Y  W q ty }	 z)||k rt	
d||	| t|I dH  ntd| dt|	 d|	W Y d}	~	qd}	~	ww dS )z
    Upload a local file to a URL.
    output_url: The URL to upload the file to.
    data_or_file: Either the data to upload or the path to the file to upload.
    from_file: If True, data_or_file is the path to the file to upload.
    r=   r6   i  )r>   )timeoutNrb)data   zFailed to upload file.
Status: z
Response: zFailed to upload data.
Status: zPFailed to upload data (attempt %d). Error message: %s.
Retrying in %d seconds...zFailed to upload data (attempt z). Error message: .)rangerO   rP   ZClientTimeoutrS   putstatus	ExceptionrR   loggererrorasynciosleepr(   )
rb   rc   rd   max_retriesdelayattemptrU   r]   responseer+   r+   r,   upload_data   sp   


(

*(rw   output_tmp_dirc                    s   |  ds|  dr|du rBtd t }|D ]
}t| |d q|d td|  t| | 	 
dd	d
I dH  dS tjdd|ddd*}td|j t|j|I dH  td|  t| |jdd
I dH  W d   dS 1 syw   Y  dS td|  t| |I dH  dS )a  
    Write batch_outputs to a file or upload to a URL.
    path_or_url: The path or URL to write batch_outputs to.
    batch_outputs: The list of batch outputs to write.
    output_tmp_dir: The directory to store the output file before uploading it
    to the output URL.
    rI   rJ   Nz Writing outputs to memory bufferr\   r   zUploading outputs to %srK   F)rd   r[   Ztmp_batch_output_z.jsonl)moderM   dirprefixsuffixz*Writing outputs to temporary local file %sTz Writing outputs to local file %s)rN   rn   infor   r^   r_   seekrw   rT   stripencodetempfileNamedTemporaryFilenamera   )rH   rZ   rx   Zoutput_bufferr`   rW   r+   r+   r,   
write_file   s>   	

"r   request	error_msgc                 C   s0   t dt  | jttjdt  d|d}|S )Nvllm-vllm-batch-status_code
request_idid	custom_idru   ro   )r   r   r   r   r   BAD_REQUEST)r   r   batch_outputr+   r+   r,   make_error_request_output  s   

	r   c                    s   t | |S r8   )r   )r   r   r+   r+   r,   make_async_error_request_output  s   
r   serving_engine_functrackerc                    s   | |j I d H }t|ttttfr(tdt  |jt	|dt  dd d}n#t|t
rEtdt  |jt	|jjdt  d|d}nt|dd}|  |S )Nr   r   )bodyr   r   r   z'Request must not be sent in stream moder   )r   
isinstancer   r   r   r   r   r   r   r   r   ro   coder   r:   )r   r   r   ru   r   r+   r+   r,   run_request  s>   



	r   engine_clientvllm_configargsc              
      s&   j d ur
 j }n jg} jrt jd}nd } fdd|D }|j}tjr1|  I d H }n|j	}t
d| t| ||d d}d|v rTt| || j|d d jdnd }	d	|v rdt| |||d dd
nd }
d|v ort|jdddk}d	|v sy|rt| |||dnd }t }t
d j g }t jI d H  dD ]}| }|sqt|}|jdkr|	d ur|	jnd }|d u r|t|dd q|t||| |   q|jdkr|
d ur|
j!nd }|d u r|t|dd q|t||| |   q|j"dr,|d ur|j#nd }|d u r|t|dd q|t||| |   q|j"drZ|d ur;|j$nd }|d u rL|t|dd q|t||| |   q|t|d|j dd q|%  t&j'| I d H }W d    n	1 sw   Y  t( j)| j*I d H  d S )N)max_log_lenc                    s   g | ]	}t | jd qS ))r   Z
model_path)r   model).0r   r   r+   r,   
<listcomp>J  s    zrun_batch.<locals>.<listcomp>zSupported_tasks: %s)r   model_configbase_model_pathsZlora_modulesgenerateauto)request_loggerchat_templatechat_template_content_formatenable_prompt_tokens_detailsZembed)r   r   r   ZclassifyZ
num_labelsr   r6   )r   zReading batch from %s...
z/v1/chat/completionsz/The model does not support Chat Completions APIr   z/v1/embeddingsz)The model does not support Embeddings APIz/scorez%The model does not support Scores APIz/rerankz%The model does not support Rerank APIzURL z was used. Supported endpoints: /v1/chat/completions, /v1/embeddings, /score, /rerank .See vllm/entrypoints/openai/api_server.py for supported score/rerank versions.)+Zserved_model_namer   Zenable_log_requestsr   r   r   envsZVLLM_USE_V1Zget_supported_taskssupported_tasksrn   r}   r   r   Zresponse_roler   r   getattrZ	hf_configr   r0   Z
input_filerX   r   splitr   Zmodel_validate_jsonurlZcreate_chat_completionappendr   r   r7   Zcreate_embeddingendswithZcreate_scoreZ	do_rerankrD   rp   gatherr   Zoutput_filerx   )r   r   r   Zserved_model_namesr   r   r   r   Zopenai_serving_modelsZopenai_serving_chatZopenai_serving_embeddingZenable_serving_rerankingZopenai_serving_scoresr   Zresponse_futuresZrequest_jsonr   Zchat_handler_fnZembed_handler_fnZscore_handler_fnZrerank_handler_fn	responsesr+   r   r,   	run_batch;  s&  


	
	
















r   c              	      s   ddl m} ddlm} || |jdd4 I d H }| I d H }t||| I d H  W d   I d H  d S 1 I d H s;w   Y  d S )Nr   )build_async_engine_client)UsageContextF)Zusage_contextZ disable_frontend_multiprocessing)Z"vllm.entrypoints.openai.api_serverr   Zvllm.usage.usage_libr   ZOPENAI_BATCH_RUNNERZget_vllm_configr   )r   r   r   r   r   r+   r+   r,   main  s   .r   __main__z$vLLM batch processing API version %szargs: %szPrometheus metrics enabled)portaddrzPrometheus metrics disabled)Orp   r   argparser   collections.abcr   httpr   ior   typingr   r   rO   rA   Zprometheus_clientr   r	   Z	vllm.envsr   Zvllm.configr
   Zvllm.engine.arg_utilsr   r   Zvllm.engine.protocolr   Zvllm.entrypoints.loggerr   Z vllm.entrypoints.openai.protocolr   r   r   r   r   r   r   r   Z$vllm.entrypoints.openai.serving_chatr   Z)vllm.entrypoints.openai.serving_embeddingr   Z&vllm.entrypoints.openai.serving_modelsr   r   Z%vllm.entrypoints.openai.serving_scorer   Zvllm.loggerr   Z
vllm.utilsr   r   Zvllm.versionr   ZVLLM_VERSIONrE   rn   r-   r/   rC   r0   r(   rX   listra   boolrw   r   r   r   r   r   r   r   r}   Zenable_metricsr   r   runr+   r+   r+   r,   <module>   s   (B




3
)


"
 


