o
    o iL                  	   @   s  U d dl Z d dlZd dlmZ d dlZd dlmZmZ d dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ dZdZe ZdZdZedde
fddZe ddede
fddZ!e"dkre # Z$e$j%de&dd e$j%de'dd e$j%de&dd e$j%de&dd e(e$Z$e$) Z*e+e*Z,ee-d< e.e,ZeejjjdZej/ee*j0e*j1dee*j2e*j3d dS dS )     N)AsyncGenerator)FastAPIRequest)JSONResponseResponseStreamingResponse)AsyncEngineArgs)AsyncLLMEngine)SamplingParams)random_uuid)adapt_tokenizer)JSONLogitsProcessorRegexLogitsProcessor      z/healthreturnc                      s   t ddS )zHealth check.   status_code)r    r   r   `/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/outlines/serve/serve.pyhealth0   s   
r   z	/generaterequestc                    sJ  t dusJ |  I dH }|d |dd}|dd}|dd}|dur0t|tg}n|dur;t|tg}ng }tdi |d|i}t }t  ||dt	t
df ffd	d
}|rft| S d}	2 z3 dH W }
|  I dH rt |I dH  tdd  S |
}	qj6 |	dusJ |	j  fdd|	jD }d|i}t|S )a  Generate completion for the request.

    The request should be a JSON object with the following fields:
    - prompt: the prompt to use for the generation.
    - schema: the JSON schema to use for the generation (if regex is not provided).
    - regex: the regex to use for the generation (if schema is not provided).
    - stream: whether to stream the results or not.
    - other fields: the sampling parameters (See `SamplingParams` for details).
    NpromptstreamFZschemaregexlogits_processorsr   c                    sR   2 z"3 d H W } | j   fdd| jD }d|i}t|d dV  q6 d S )Nc                       g | ]} |j  qS r   text.0outputr   r   r   
<listcomp>[       z4generate.<locals>.stream_results.<locals>.<listcomp>r    zutf-8)r   outputsjsondumpsencode)request_outputtext_outputsret)results_generatorr#   r   stream_resultsX   s   z generate.<locals>.stream_resultsi  r   c                    r   r   r   r    r#   r   r   r$   m   r%   zgenerate.<locals>.<listcomp>r   r   )enginer(   popr   	tokenizerr   r
   r   generater   bytesr   Zis_disconnectedabortr   r   r'   r   )r   Zrequest_dictr   Zjson_schemaZregex_stringr   Zsampling_paramsZ
request_idr/   Zfinal_outputr+   r,   r-   r   )r   r.   r   r3   6   sD   

r3   __main__z--host)typedefaultz--porti@  z--ssl-keyfilez--ssl-certfileengine_args)r2   debug)hostport	log_levelZtimeout_keep_alivessl_keyfilessl_certfile)4argparser(   typingr   ZuvicornZfastapir   r   Zfastapi.responsesr   r   r   Zvllm.engine.arg_utilsr   Zvllm.engine.async_llm_enginer	   Zvllm.sampling_paramsr
   Z
vllm.utilsr   Zoutlines.models.vllmr   Zoutlines.processorsr   r   ZTIMEOUT_KEEP_ALIVEZTIMEOUT_TO_PREVENT_DEADLOCKappr0   r2   getr   postr3   __name__ArgumentParserparseradd_argumentstrintZadd_cli_args
parse_argsargsZfrom_cli_argsr9   __annotations__Zfrom_engine_argsrunr;   r<   r>   r?   r   r   r   r   <module>   sT   
;


