o
    )i!                     @   sH  d dl Z d dlmZmZ d dlmZ d dlmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZ eeZed	d
G dd dZed	d
G dd deZed	d
G dd deZed	d
G dd deZed	d
G dd deZed	d
G dd deZeG dd dZeG dd dZeG dd dZdS )    N)	dataclassfields)prod)Optional)Self)
VllmConfig)init_logger)cdivget_dtype_sizeT)frozenc                   @   sX   e Zd ZU dZeed< edefddZdedefddZ	e
d	ee defd
dZdS )KVCacheSpeczG
    A base class for specifying the KV cache format of one layer.
    
block_sizereturnc                 C      t )zs
        The size of a page with `block_size` tokens in bytes.

        Returns:
            The page size
        NotImplementedErrorself r   f/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/v1/kv_cache_interface.pypage_size_bytes   s   zKVCacheSpec.page_size_bytesvllm_configc                 C   r   )z
        The maximum possible memory usage of this KV cache in bytes.

        Returns:
            The KV cache size in bytes
        r   r   r   r   r   r   max_memory_usage_bytes&   s   z"KVCacheSpec.max_memory_usage_bytesspecsc                    s4   t  fdd dd D sJ dt d S )zW
        Merge a list of KVCacheSpec objects into a single KVCacheSpec object.
        c                 3   s    | ]	}| d  kV  qdS )r   Nr   .0specr   r   r   	<genexpr>4   s    z$KVCacheSpec.merge.<locals>.<genexpr>   Nz7All layers in the same KV cache group must be the same.r   )allcopydeepcopy)clsr   r   r   r   merge/   s    zKVCacheSpec.mergeN)__name__
__module____qualname____doc__int__annotations__propertyr   r   r   classmethodlistr   r%   r   r   r   r   r      s   
 		r   c                   @   sB   e Zd ZU eed< eed< ejed< eed< edefddZ	dS )	AttentionSpecnum_kv_heads	head_sizedtypeuse_mlar   c                 C   s.   | j rdnd}|| j | j | j t| j S )Nr       )r3   r   r0   r1   r
   r2   )r   Zcoefr   r   r   r   @   s   zAttentionSpec.page_size_bytesN)
r&   r'   r(   r*   r+   torchr2   boolr,   r   r   r   r   r   r/   9   s   
 
r/   c                   @   sz   e Zd ZU dZee ed< dZee ed< 	 dedefddZ	e
dee dee fd	d
Ze
dee defddZdS )FullAttentionSpecNsliding_windowattention_chunk_sizer   r   c                 C   s   |j j}t|| j| j S N)model_configmax_model_lenr	   r   r   )r   r   r<   r   r   r   r   V   s   z(FullAttentionSpec.max_memory_usage_byteswindow_sizesc                 C   s,   t |dkrd S t |dkr| S td)Nr   r    zOAll attention layers in the same KV cache group must have the same window size.)lenpop
ValueError)r$   r=   r   r   r   merge_window_sizesZ   s   z$FullAttentionSpec.merge_window_sizesr   c              
   C   s   t dd |D sJ dtdd |D }tdd |D }| |d j|d j|d j|d j|d j| || |d}|D ]}tt	D ]}t
||jt
||jksYJ dqGqA|jd	u|jd	u d
kskJ d|S )zl
        Merge a list of FullAttentionSpec objects into a single 
        FullAttentionSpec object.
        c                 s   s    | ]}t |tV  qd S r:   )
isinstancer7   r   r   r   r   r   k   s    z*FullAttentionSpec.merge.<locals>.<genexpr>zJAll attention layers in the same KV cache group must be FullAttentionSpec.c                 s        | ]}|j d ur|j V  qd S r:   )r8   r   r   r   r   r   o       
c                 s   rC   r:   )r9   r   r   r   r   r   q   rD   r   )r   r0   r1   r2   r3   r8   r9   zRAll attention layers in the same KV cache group must have the same attention spec.Nr    zZModel with both sliding window layers and chunked local attention layers is not supported.)r!   setr   r0   r1   r2   r3   rA   r   r/   getattrnamer8   r9   )r$   r   r8   r9   Zmerged_specr   fr   r   r   r%   e   s8   	zFullAttentionSpec.merge)r&   r'   r(   r8   r   r*   r+   r9   r   r   r-   rE   rA   r.   r   r%   r   r   r   r   r7   H   s   
 

r7   c                   @   s(   e Zd ZU eed< dedefddZdS )ChunkedLocalAttentionSpecr9   r   r   c                 C   s2   |j j}|jj}t| j| |}t|| j| j S r:   )	r;   r<   scheduler_configmax_num_batched_tokensminr9   r	   r   r   r   r   r<   rK   Z
num_tokensr   r   r   r      s   
z0ChunkedLocalAttentionSpec.max_memory_usage_bytesN)r&   r'   r(   r*   r+   r   r   r   r   r   r   rI      s   
 rI   c                   @   s0   e Zd ZU eed< dd ZdedefddZdS )	SlidingWindowSpecr8   c                 C   s   | j rJ dd S )Nz'MLA is not supported for sliding window)r3   r   r   r   r   __post_init__   s   zSlidingWindowSpec.__post_init__r   r   c                 C   s:   |j j}|jj}t| jd | |}t|| jd | j S )Nr    )	r;   r<   rJ   rK   rL   r8   r	   r   r   rM   r   r   r   r      s   z(SlidingWindowSpec.max_memory_usage_bytesN)r&   r'   r(   r*   r+   rO   r   r   r   r   r   r   rN      s   
 rN   c                   @   st   e Zd ZU eeedf df ed< eej ed< dZe	e ed< dZ
eed< edefd	d
ZdedefddZdS )	MambaSpec.shapesdtypesNpage_size_paddedZmamba2
mamba_typer   c                 C   s>   t dd t| j| jD }| jd ur| j|ksJ | jS |S )Nc                 s   s$    | ]\}}t |t| V  qd S r:   )r   r
   )r   shaper2   r   r   r   r      s
    
z,MambaSpec.page_size_bytes.<locals>.<genexpr>)sumziprQ   rR   rS   )r   Z	page_sizer   r   r   r      s   
zMambaSpec.page_size_bytesr   c                 C   s   | j S r:   )r   r   r   r   r   r      s   z MambaSpec.max_memory_usage_bytes)r&   r'   r(   tupler*   r+   r5   r2   rS   r   rT   strr,   r   r   r   r   r   r   r   rP      s   
 	rP   c                   @   s&   e Zd ZU dZeed< ee ed< dS )KVCacheTensorzP
    A class for specifying how the workers should initialize the KV cache.
    sizeZ	shared_byN)r&   r'   r(   r)   r*   r+   r.   rY   r   r   r   r   rZ      s   
 rZ   c                   @   s&   e Zd ZU dZee ed< eed< dS )KVCacheGroupSpecz
    Represents a group of model layers that share the same KV cache block table.
    These layers are regarded as one layer in the KV cache manager.
    Zlayer_namesZkv_cache_specN)r&   r'   r(   r)   r.   rY   r+   r   r   r   r   r   r\      s   
 r\   c                   @   s8   e Zd ZU dZ	 eed< 	 ee ed< 	 ee ed< dS )KVCacheConfigz0
    The KV cache configuration of a model.
    Z
num_blocksZkv_cache_tensorsZkv_cache_groupsN)	r&   r'   r(   r)   r*   r+   r.   rZ   r\   r   r   r   r   r]      s   
 r]   )r"   dataclassesr   r   mathr   typingr   r5   Ztyping_extensionsr   Zvllm.configr   Zvllm.loggerr   Z
vllm.utilsr	   r
   r&   loggerr   r/   r7   rI   rN   rP   rZ   r\   r]   r   r   r   r   <module>   s8   %@