o
    )i4                  
   @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
 d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ ee Z!dZ"deddfddZ#dd Z$defddZ%dd Z&dd Z'	d,de(de	e( de	e)e*ef  de	e( fddZ+de*fdd Z,d!e j-d"e.e* fd#d$Z/de(de
eef d%e(d&e)de(f
d'd(Z0d)e
e j1ef fd*d+Z2dS )-    N)AnyOptionalUnion)Request)JSONResponseStreamingResponse)BackgroundTaskBackgroundTasks)
EngineArgs)make_arg_parser)ChatCompletionRequestCompletionRequest)init_logger)current_platform)FlexibleArgumentParserac  Tip: Use `vllm [serve|run-batch|bench <bench_type>] --help=<keyword>` to explore arguments from help.
   - To view a argument group:     --help=ModelConfig
   - To view a single argument:    --help=max-num-seqs
   - To search by keyword:         --help=max
   - To list all groups:           --help=listgroup
   - To view help with pager:      --help=pagerequestreturnc                    sT   	 |   I dH }|d dkr)t| jjddr't| jjdr'| jj jd8  _dS q)	z+Returns if a disconnect message is receivedTNtypezhttp.disconnectenable_server_load_trackingFserver_load_metrics   )Zreceivegetattrappstatehasattrr   )r   message r   b/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/entrypoints/utils.pylisten_for_disconnect%   s   
r   c                       t   fdd}|S )a  Decorator that allows a route handler to be cancelled by client
    disconnections.

    This does _not_ use request.is_disconnected, which does not work with
    middleware. Instead this follows the pattern from
    starlette.StreamingResponse, which simultaneously awaits on two tasks- one
    to wait for an http disconnect message, and the other to do the work that we
    want done. When the first task finishes, the other is cancelled.

    A core assumption of this method is that the body of the request has already
    been read. This is a safe assumption to make for fastapi handlers that have
    already parsed the body of the request into a pydantic model for us.
    This decorator is unsafe to use elsewhere, as it will consume and throw away
    all incoming messages for the request while it looks for a disconnect
    message.

    In the case where a `StreamingResponse` is returned by the handler, this
    wrapper will stop listening for disconnects and instead the response object
    will start listening for disconnects.
    c                     s   t | dkr| d n|d }t | i |}tt|}tj||gtjdI d H \}}|D ]}|  q1||v r@| S d S )Nr   raw_request)return_when)lenasynciocreate_taskr   waitFIRST_COMPLETEDcancelresult)argskwargsr   Zhandler_taskZcancellation_taskdonependingtaskhandler_funcr   r   wrapperL   s   

z"with_cancellation.<locals>.wrapper	functoolswraps)r/   r0   r   r.   r   with_cancellation4   s   r4   c                 C   s   | j j jd8  _d S )Nr   )r   r   r   )r   r   r   r   decrement_server_loada   s   r5   c                    r   )Nc                     sd  | dt| dkr| d nd }|d u rtdt|jjdds+ | i |I d H S t|jjds7d|jj_|jj jd7  _z | i |I d H }W n ty]   |jj jd8  _ w t	|t
tfr|jd u rrtt||_|S t	|jtr|jt| |S t	|jtrt }|j|jjg|jjR i |jj |t| ||_|S |jj jd8  _|S )Nr    r   z9raw_request required when server load tracking is enabledr   Fr   r   )getr"   
ValueErrorr   r   r   r   r   	Exception
isinstancer   r   
backgroundr   r5   r	   add_taskfuncr)   r*   )r)   r*   r    responsetasksr<   r   r   r0   g   sX   


z load_aware_call.<locals>.wrapperr1   )r<   r0   r   r?   r   load_aware_calle   s   ,r@   c                   C   s&   dt jvrtd dt jd< d S d S )NZVLLM_WORKER_MULTIPROC_METHODz/Setting VLLM_WORKER_MULTIPROC_METHOD to 'spawn'Zspawn)osenvironloggerdebugr   r   r   r   cli_env_setup   s   

rE   max_model_lentruncate_prompt_tokenstokenization_kwargsc                 C   sb   |d ur'|dkr
| }|| krt d| d|  d|d ur%d|d< ||d< |S |d ur/d|d< |S )	Nztruncate_prompt_tokens value (z!) is greater than max_model_len (z,). Please, select a smaller truncation size.TZ
truncation
max_lengthF)r7   )rF   rG   rH   r   r   r   _validate_truncation_size   s    rK   textc                 C   sb   ddg}|D ]$}zt j| t jdd}|j| d W  dS  t jttfy*   Y qw t|  dS )z>Output text using scrolling view if available and appropriate.zless -RmoreT)stdinrL   )inputN)	
subprocessPopensplitPIPEcommunicateSubprocessErrorOSErrorFileNotFoundErrorprint)rL   ZpagersZ	pager_cmdprocr   r   r   _output_with_pager   s   
rZ   parsersubcommand_namec           
         s   t tjt |kstjddt |  |krd S tjD ]}|dr|ddd   dkr;|  }t| td  dkrxdg}| jD ]&}|j	rk|j	dsk|
d	|j	  |jrf|
d
|j   |
d qEtd| td |  }| jD ]/}|j	r|j	   kr||j	 ||j ||j |  t|  td qg }| jD ]}|jD ]}t fdd|jD r|
| qq|rd  d}	|  }|| t|	|   td td  d td td qd S )Nr   z--help==pager   Z	listgroupz
Available argument groups:zpositional argumentsz  - z     
c                 3   s     | ]}   |  v V  qd S N)lower).0optZsearch_keywordr   r   	<genexpr>  s    z<show_filtered_argument_or_group_from_help.<locals>.<genexpr>z
Parameters matching 'z':
z!
No group or parameter matching ''z/Tip: use `--help=listgroup` to view all groups.)r"   sysargv
startswithrR   format_helprZ   exit_action_groupstitleappenddescriptionstripjoin_get_formatterrb   start_sectionadd_textadd_arguments_group_actionsend_sectionanyoption_stringsrX   )
r[   r\   arg	help_textoutput_linesgroup	formatterZmatched_actionsactionheaderr   re   r   )show_filtered_argument_or_group_from_help   st   














r   input_lengthdefault_sampling_paramsc                 C   sD   t |dd p|j}| | }t|}tdd ||||dfD S )NZmax_completion_tokensc                 s   s    | ]	}|d ur|V  qd S ra   r   )rc   valr   r   r   rf   *  s    z!get_max_tokens.<locals>.<genexpr>
max_tokens)r   r   r   Zget_max_output_tokensminr6   )rF   r   r   r   r   Zdefault_max_tokensZmax_output_tokensr   r   r   get_max_tokens!  s   
r   r)   c           	      C   s   i }t | tjr*tt }t|g  D ]\}}|t| |kr(t| |||< qn*t | t	rPt	 }t
| D ]}t| |j}t||j}||krN|||j< q7ntdtd| d S )NzMUnsupported argument type. Must be argparse.Namespace or EngineArgs instance.znon-default args: %s)r9   argparse	Namespacer   r   vars
parse_argsitemsr   r
   dataclassesfieldsname	TypeErrorrC   info)	r)   Znon_default_argsr[   r{   defaultZdefault_argsfieldZcurrent_valZdefault_valr   r   r   log_non_default_args0  s&   


r   ra   )3r   r#   r   r2   rA   rP   rh   typingr   r   r   Zfastapir   Zfastapi.responsesr   r   Zstarlette.backgroundr   r	   Zvllm.engine.arg_utilsr
   Z vllm.entrypoints.openai.cli_argsr   Z vllm.entrypoints.openai.protocolr   r   Zvllm.loggerr   Zvllm.platformsr   Z
vllm.utilsr   __name__rC   ZVLLM_SUBCMD_PARSER_EPILOGr   r4   r5   r@   rE   intdictstrrK   rZ   ArgumentParserlistr   r   r   r   r   r   r   r   <module>   sb   
-2


H
