o
    )il4                     @   s   d dl Z d dlmZ d dlmZmZmZmZmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ erKd d
lmZ neZeeZed Zed ZeeG dd dZdS )    N)field)TYPE_CHECKINGAnyLiteralOptionalUnion)SkipValidationmodel_validator)	dataclass)Self)config)init_logger)DEFAULT_MAX_NUM_BATCHED_TOKENS'MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS$POOLING_MODEL_MAX_NUM_BATCHED_TOKENS)
RunnerType)ZswapZ	recompute)fcfspriorityc                   @   s  e Zd ZU dZdZeed< 	 dZee	 ed< 	 dZ
ee	 ed< 	 dZee	 ed< 	 dZe	ed	< 	 dZe	ed
< 	 dZe	ed< 	 dZe	ed< 	 eedZee	 ed< 	 dZeed< 	 dZee ed< 	 dZeed< 	 eddZe	ed< 	 eddZe	ed< 	 dZee ed< 	 dZeed< 	 dZeed< 	 eddZ eed< 	 dZ!eed< 	 dZ"e#e$e%e& f ed< 	 dZ'eed < 	 dZ(eed!< 	 d"e$fd#d$Z)d+d%d&Z*e+d'd(d"e,fd)d*Z-dS ),SchedulerConfigzScheduler configuration.generaterunner_typeNmax_num_batched_tokensmax_num_seqsmax_model_len   max_num_partial_prefillsmax_long_partial_prefillsr   long_prefill_token_thresholdnum_lookahead_slots)default_factorycuda_graph_sizesg        delay_factorenable_chunked_prefillFis_multimodal_model)initmax_num_encoder_input_tokensencoder_cache_sizepreemption_modesend_delta_datar   policychunked_prefill_enableddisable_chunked_mm_inputzvllm.core.scheduler.Schedulerscheduler_clsdisable_hybrid_kv_cache_managerasync_schedulingreturnc                 C   s"   g }t jt| dd }|S )a  
        WARNING: Whenever a new field is added to this config,
        ensure that it is included in the factors list if
        it affects the computation graph.

        Provide a hash that uniquely identifies all the configs
        that affect the structure of the computation
        graph from input ids/embeddings to the final hidden states,
        excluding anything before input ids/embeddings and after
        the final hidden states.
        F)usedforsecurity)hashlibmd5strencode	hexdigest)selfZfactorsZhash_str r7   a/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/config/scheduler.pycompute_hash   s   zSchedulerConfig.compute_hashc                 C   s  | j d u rd| _ | jd u rd| _| jd u rD| jrt| _nt| j t| _| jdkr/t| jt| _| jr9t| jt	| _t
| j| j  | j| _| j| _| j| _| jrVtd| j | j| _| jdkrw| jdkrlt| j d | _td| j| j| j | jst
| jd	 d
g| _| jrd| _d S d S )Ni       Zpoolingz:Chunked prefill is enabled with max_num_batched_tokens=%d.r   r   g{Gz?zConcurrent partial prefills enabled with max_num_partial_prefills=%d, max_long_partial_prefills=%d, long_prefill_token_threshold=%d   i   z1vllm.v1.core.sched.async_scheduler.AsyncScheduler)r   r   r   r"   r   maxr   r   r#   r   minr%   r&   loggerinfor*   r   r   intr   r    r.   r,   r6   r7   r7   r8   __post_init__   s`   






zSchedulerConfig.__post_init__after)modec                 C   s&  | j | jk r| jstd| j  d| j d| j | jk r)td| j  d| j d| j | j| j kr>td| j | j| j  | jdk rLtd| j d	| jd
k rZtd| j d| jd
kry| jsftd| j	| jkrytd| j	 d| j d| j
d
k s| j
| jkrtd| j
 d| j d| S )Nzmax_num_batched_tokens (z!) is smaller than max_model_len (z). This effectively limits the maximum sequence length to max_num_batched_tokens and makes vLLM reject longer sequences. Please increase max_num_batched_tokens or decrease max_model_len.z1) must be greater than or equal to max_num_seqs (z).zlmax_num_batched_tokens (%d) exceeds max_num_seqs * max_model_len (%d). This may lead to unexpected behavior.r   znum_lookahead_slots (z%) must be greater than or equal to 0.r   zmax_num_partial_prefills (z%) must be greater than or equal to 1.zDChunked prefill must be enabled to set max_num_partial_prefills > 1.zlong_prefill_token_threshold (z,) cannot be greater than the max_model_len (zmax_long_partial_prefills (zY) must be greater than or equal to 1 and less than or equal to max_num_partial_prefills ()r   r   r*   
ValueErrorr   r>   warningr   r   r   r   rA   r7   r7   r8   _verify_args   sj   






zSchedulerConfig._verify_args)r/   N).__name__
__module____qualname____doc__r   r   __annotations__r   r   r@   r   r   r   r   r   r   r   listr    r!   floatr"   boolr#   r%   r&   r'   r   PreemptionModer(   r)   SchedulerPolicyr*   r+   r,   r   r3   typeobjectr-   r.   r9   rB   r	   r   rG   r7   r7   r7   r8   r      sd   
 	
Er   )r1   dataclassesr   typingr   r   r   r   r   Zpydanticr   r	   Zpydantic.dataclassesr
   Ztyping_extensionsr   Zvllm.config.utilsr   Zvllm.loggerr   Z
vllm.utilsr   r   r   Zvllm.configr   rH   r>   rP   rQ   r   r7   r7   r7   r8   <module>   s$   