o
    )i                     @   sR   d dl mZ d dlmZmZmZ d dlmZ d dlm	Z	 e	e
ZG dd dZdS )    )Optional)CompilationLevelCUDAGraphMode
VllmConfig)BatchDescriptor)init_loggerc                   @   s`   e Zd ZdZdefddZdedefddZd	ed
e	fddZ
dedeeee f fddZdS )CudagraphDispatchera  
    Runtime cudagraph dispatcher to dispach keys for multiple set of cudagraphs.

    The dispatcher stores two sets of dispatch keys, one for PIECEWISE and one
    for FULL cudagraph runtime mode. The keys are initialized depending on 
    attention support and what cudagraph mode is set in CompilationConfig. The 
    keys stored in dispatcher are the only source of truth for valid
    cudagraphs that can be dispatched at runtime.

    At runtime, the dispatch method generates the runtime cudagraph mode (FULL, 
    PIECEWISE, or NONE for no cudagraph) and the valid key (batch descriptor)
    based on the input key. After dispatching (commuicate via forward context), 
    the cudagraph wrappers will trust the dispatch key to do either capturing
    or replaying (if mode matched), or pass through to the underlying runnable 
    without cudagraph (if mode no match or mode is NONE).
    vllm_configc                 C   s~   || _ |j| _| jj| _tjt tjt i| _| j r:| jj	t
jkr(| j s:J d| j d| jj	 d| jj d| _d S )NzxCompilation level should be CompilationLevel.PIECEWISE when cudagraph_mode piecewise cudagraphs is used, cudagraph_mode=z, compilation_level=z, splitting_ops=F)r	   compilation_configcudagraph_moder   	PIECEWISEsetFULLcudagraph_keysZrequires_piecewise_compilationlevelr   Zsplitting_ops_contain_attentionZsplitting_opskeys_initialized)selfr	    r   h/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/v1/cudagraph_dispatcher.py__init__   s&   


zCudagraphDispatcher.__init__runtime_modebatch_descriptorc                 C   s2   |t jt jfv sJ d| | j| | d S )Nz Invalid cudagraph runtime mode: )r   r   r   r   add)r   r   r   r   r   r   add_cudagraph_key4   s   z%CudagraphDispatcher.add_cudagraph_keyr   uniform_decode_query_lenc                    s   |  tjkr| jjD ]}| |  t|dd q| tjkrH|	 rH| j
jj   fdd| jjD }|D ]}| tjt|dd q:d| _d S )NF)Z
num_tokensZuniform_decodec                    s    g | ]}| kr|kr|qS r   r   ).0xZmax_num_tokensr   r   r   
<listcomp>Q   s
    zACudagraphDispatcher.initialize_cudagraph_keys.<locals>.<listcomp>T)Z
mixed_moder   NONEr
   Zcudagraph_capture_sizesr   r   Zdecode_moder   Zseparate_routiner	   Zscheduler_configZmax_num_seqsr   )r   r   r   bsZ"cudagraph_capture_sizes_for_decoder   r   r   initialize_cudagraph_keys:   s*   


z-CudagraphDispatcher.initialize_cudagraph_keysreturnc                 C   sx   | j std tjdfS || jtj v rtj|fS |j}|| jtj v r*tj|fS || jtj v r7tj|fS tjdfS )z
        Given a batch descriptor, dispatch to a cudagraph mode.
        A new batch descriptor is returned as we might dispatch a uniform batch 
        to a graph that supports a more general batch (uniform to non-uniform).
        zJcudagraph dispatching keys are not initialized. No cudagraph will be used.N)	r   loggerZwarning_oncer   r   r   r   Znon_uniformr   )r   r   Znon_uniform_keyr   r   r   dispatch[   s   	





zCudagraphDispatcher.dispatchN)__name__
__module____qualname____doc__r   r   r   r   r   intr!   tupler   r$   r   r   r   r   r      s    

!r   N)typingr   Zvllm.configr   r   r   Zvllm.forward_contextr   Zvllm.loggerr   r%   r#   r   r   r   r   r   <module>   s   