o
    )iY                     @   st   d dl mZmZmZ d dlZd dlmZmZ d dlm	Z	m
Z
mZ d dlmZ G dd de	ZG dd	 d	e
e ZdS )
    )ListOptionalTypeN)AttentionTypeis_quantized_kv_cache)MLACommonBackendMLACommonImplMLACommonMetadata)decode_attention_fwdc                   @   s4   e Zd ZedefddZeded fddZdS )TritonMLABackendreturnc                   C   s   dS )NZ
TRITON_MLA r   r   r   n/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/attention/backends/triton_mla.pyget_name      zTritonMLABackend.get_nameTritonMLAImplc                   C   s   t S )N)r   r   r   r   r   get_impl_cls   r   zTritonMLABackend.get_impl_clsN)__name__
__module____qualname__staticmethodstrr   r   r   r   r   r   r   r      s
    r   c                       s   e Zd Zdededededeee  dee dedee d	ed
ee ddf fddZde	j
de	j
de	j
dede	j
f
ddZ  ZS )r   	num_heads	head_sizescalenum_kv_headsalibi_slopessliding_windowkv_cache_dtypelogits_soft_cap	attn_typekv_sharing_target_layer_namer   Nc                    sh   t  j|||||||||	|
f
i | |||g}t|r td|	tjkr)tdt| jr2tdd S )NzbTritonMLAImpl does not support one of the following: alibi_slopes, sliding_window, logits_soft_capz`Encoder self-attention and encoder/decoder cross-attention are not implemented for TritonMLAImplz-TritonMLA with FP8 KV cache not yet supported)super__init__anyNotImplementedErrorr   ZDECODERr   r   )selfr   r   r   r   r   r   r   r   r    r!   Zmla_argsZunsupported_features	__class__r   r   r#      s&   


zTritonMLAImpl.__init__q_nopeq_pekv_c_and_k_pe_cacheattn_metadatac                 C   s   |  dksJ |j}|d usJ |jd }tj||gdd}tj|| j| j|j|j	d}d}	tj
|| j|	| jd ftj|j	d}
|d}|dd | jf }|d}t|||||j|j|
|	| j|
 | |S )	Nr   )dim)dtypedevice         .)ZnumelZdecode_metadatashapetorchcatZzerosr   Zkv_lora_rankr/   r0   emptyZfloat32Z	unsqueezesizer
   Zblock_tablesZseq_lens_tensorr   Z
_v_up_proj)r&   r)   r*   r+   r,   Zdecode_metaBqoZnum_kv_splitsZattn_logitsZ
kv_c_cacheZ	PAGE_SIZEr   r   r   _forward_decode@   s<   




zTritonMLAImpl._forward_decode)r   r   r   intfloatr   r   r   r#   r5   ZTensorr	   r<   __classcell__r   r   r'   r   r      sF    
	
#r   )typingr   r   r   r5   Z vllm.attention.backends.abstractr   r   Z"vllm.attention.backends.mla.commonr   r   r	   Z*vllm.attention.ops.triton_decode_attentionr
   r   r   r   r   r   r   <module>   s   