o
    )i                  	   @   sr  d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
 ddlmZmZ ddlmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddl m!Z! ddl"m#Z#m$Z$m%Z% ddl&m'Z( edZ)e a*da+t*,ddefddZ-t*.ddedefddZ/ede0dedefddZ1dedefdd Z2	d<ded!e
e defd"d#Z3	d<ded!e
e d$e	ddfd%d&Z4e5d'kr7e# Z6e6j7d(e8dd) e6j7d*e6j9d+d) e6j7d,e8dd) e6j7d-e8dd) e6j7d.e8dd/d0 e6j7d1d2d3d4d5 e6j7d6e:e:ej;d7d0 e6j7d8e8dd9d0 e6j7d:e8d;d) e<e6Z6e6= Z>e?e4e> dS dS )=aR  
NOTE: This API server is used only for demonstrating usage of AsyncEngine
and simple performance benchmarks. It is not intended for production use.
For production use, we recommend using our OpenAI compatible server.
We are also not going to accept PRs modifying this file, please
change `vllm/entrypoints/openai/api_server.py` instead.
    N)	Namespace)AsyncGenerator)AnyOptional)FastAPIRequest)JSONResponseResponseStreamingResponse)AsyncEngineArgs)AsyncLLMEngine)
serve_http)with_cancellation)init_logger)SamplingParams)UsageContext)FlexibleArgumentParserrandom_uuid
set_ulimit)__version__zvllm.entrypoints.api_serverz/healthreturnc                      s   t ddS )zHealth check.   status_code)r	    r   r   g/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/entrypoints/api_server.pyhealth%   s   
r   z	/generaterequestc                    s"   |   I dH }t|| dI dH S )a%  Generate completion for the request.

    The request should be a JSON object with the following fields:
    - prompt: the prompt to use for the generation.
    - stream: whether to stream the results or not.
    - other fields: the sampling parameters (See `SamplingParams` for details).
    N)raw_request)json	_generate)r   request_dictr   r   r   generate+   s   	r"   r!   r   c           
         s   |  d |  dd}tdi | }t }td usJ t ||dttd f ffdd}|r7t| S d }z2 z3 d H W }|}q<6 W n tj	yV   t
dd Y S w |d us]J |j  d usfJ  fd	d
|jD }d|i}	t|	S )NpromptstreamFr   c                    s^   2 z(3 d H W } | j   d usJ  fdd| jD }d|i}t|d dV  q6 d S )Nc                       g | ]} |j  qS r   text.0outputr#   r   r   
<listcomp>G   s    
z5_generate.<locals>.stream_results.<locals>.<listcomp>r'   
zutf-8)r#   outputsr   dumpsencode)request_outputtext_outputsret)results_generatorr+   r   stream_resultsC   s   
z!_generate.<locals>.stream_resultsi  r   c                    r%   r   r&   r(   r+   r   r   r,   [   s    z_generate.<locals>.<listcomp>r'   r   )popr   r   enginer"   r   bytesr
   asyncioCancelledErrorr	   r#   r.   r   )
r!   r   r$   Zsampling_paramsZ
request_idr5   Zfinal_outputr1   r2   r3   r   )r#   r4   r   r    8   s0   


r    argsc                 C   s   | j t_ tS N)	root_pathapp)r;   r   r   r   	build_app`   s   r?   
llm_enginec                    s<   t | }t| }|d ur|ntj|tjdat|j_	|S )N)Zusage_context)
r?   r   Zfrom_cli_argsr   Zfrom_engine_argsr   Z
API_SERVERr7   stateZengine_client)r;   r@   r>   Zengine_argsr   r   r   init_appg   s   
rB   uvicorn_kwargsc                    s   t dt t d|  t  t| |I d H }td usJ t|fd | j| j| j	| j
tj| j| j| j| jd
|I d H }|I d H  d S )NzvLLM API server version %szargs: %s)
sockenable_ssl_refreshhostport	log_levelZtimeout_keep_alivessl_keyfilessl_certfilessl_ca_certsssl_cert_reqs)loggerinfoVLLM_VERSIONr   rB   r7   r   rE   rF   rG   rH   envsZVLLM_HTTP_TIMEOUT_KEEP_ALIVErI   rJ   rK   rL   )r;   r@   rC   r>   Zshutdown_taskr   r   r   
run_serverw   s.   rQ   __main__z--host)typedefaultz--porti@  z--ssl-keyfilez--ssl-certfilez--ssl-ca-certszThe CA certificates file)rS   rT   helpz--enable-ssl-refresh
store_trueFz5Refresh SSL Context when SSL certificate files change)actionrT   rU   z--ssl-cert-reqsz@Whether client certificate is required (see stdlib ssl module's)z--root-pathz?FastAPI root_path when app is behind a path based routing proxyz--log-leveldebugr<   )@__doc__r9   r   sslargparser   collections.abcr   typingr   r   Zfastapir   r   Zfastapi.responsesr   r	   r
   Z	vllm.envsrP   Zvllm.engine.arg_utilsr   Zvllm.engine.async_llm_enginer   Zvllm.entrypoints.launcherr   Zvllm.entrypoints.utilsr   Zvllm.loggerr   Zvllm.sampling_paramsr   Zvllm.usage.usage_libr   Z
vllm.utilsr   r   r   Zvllm.versionr   rO   rM   r>   r7   getr   postr"   dictr    r?   rB   rQ   __name__parseradd_argumentstrZ
check_portint	CERT_NONEZadd_cli_args
parse_argsr;   runr   r   r   r   <module>   s   '	



