o
    )i4                     @   s   d dl mZ d dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZ d dlZd dlmZmZmZmZ d dlmZ d dlmZ erFd d	lmZ d d
lmZ G dd deZeG dd deZG dd dee ZG dd deZdS )    )defaultdict)	dataclass)
accumulate)TYPE_CHECKINGDictListOptionalTupleTypeN)AttentionBackendAttentionImplAttentionMetadataAttentionMetadataBuilderCommonAttentionState)MultiModalPlaceholderMap)ModelInputForGPUBuilder)async_tensor_h2dc                   @   s   e Zd ZdZedefddZeded fddZeded fd	d
Z	eded fddZ
eded fddZedededededeedf f
ddZedejdejdejddfddZedeej dejddfd d!ZdS )"PlaceholderAttentionBackendz4Placeholder backend for when no attention is needed.returnc                   C      dS )NZNO_ATTENTION r   r   r   t/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/vllm/attention/backends/placeholder_attn.pyget_name      z$PlaceholderAttentionBackend.get_namePlaceholderAttentionImplc                   C      t S N)r   r   r   r   r   get_impl_cls    r   z(PlaceholderAttentionBackend.get_impl_cls#PlaceholderAttentionMetadataBuilderc                   C   r   r   )r   r   r   r   r   get_builder_cls$   r   z+PlaceholderAttentionBackend.get_builder_clsPlaceholderAttentionMetadatac                   C   r   r   )r!   r   r   r   r   get_metadata_cls(   r   z,PlaceholderAttentionBackend.get_metadata_clsr   c                   C   r   r   r   r   r   r   r   get_state_cls,   r   z)PlaceholderAttentionBackend.get_state_cls
num_blocks
block_sizenum_kv_heads	head_size.c                 C   r   )N)   r(   r(   r(   r(   r   )r$   r%   r&   r'   r   r   r   get_kv_cache_shape0   s   z.PlaceholderAttentionBackend.get_kv_cache_shapesrc_kv_cachedst_kv_cache
src_to_dstNc                 C      d S r   r   )r*   r+   r,   r   r   r   swap_blocks9   s   z'PlaceholderAttentionBackend.swap_blocks	kv_cachessrc_to_distsc                 C   r-   r   r   )r/   r0   r   r   r   copy_blocksA   s   z'PlaceholderAttentionBackend.copy_blocks)__name__
__module____qualname____doc__staticmethodstrr   r
   r   r    r"   r#   intr	   r)   torchTensorr.   r   r1   r   r   r   r   r      sT    
r   c                   @   s   e Zd ZU dZeee  ed< eej	 ed< eed< eed< eej	 ed< e
ed< ee ed< ee ed	< d
Zeej	 ed< d
Zeej	 ed< d
Zeej	 ed< d
Zed  ed< d
Zed  ed< eded  fddZeded  fddZd
S )r!   z;Attention metadata for prefill and decode batched together.seq_lensseq_lens_tensormax_prefill_seq_lenmax_decode_seq_lencontext_lens_tensoruse_cuda_graphmax_query_lenmax_decode_query_lenNquery_start_locseq_start_locblock_tables_cached_prefill_metadata_cached_decode_metadatar   c                 C   sT  | j dkrd S | jd ur| jS | jd u rd n	| jd | j d  }| jd u r'd n| jd | j  }| jd u r6d n| jd | j  }| jd u rEd n	| jd | j d  }| jd u rVd n| jd | j  }td}td}t	di d| j d| j
ddd|d| jd| jd	|d
|ddd| jd| jddd|d|d|d|dd| _| jS )Nr   r(   num_prefillsnum_prefill_tokensnum_decode_tokensslot_mapping"multi_modal_placeholder_index_mapsenable_kv_scales_calculationr;   r<   rB   rA   r=   r>   rC   rD   r?   rE   r@   Fr   )rH   rF   rC   r;   r<   rD   r?   r9   emptyr!   rI   rL   rM   rA   r=   )selfrC   r;   r<   rD   r?   rK   rE   r   r   r   prefill_metadatav   st   



	
z-PlaceholderAttentionMetadata.prefill_metadatac                 C   s@  | j dkrd S | jd ur| jS | jd usJ td}td}| jd u r'd n| j| jd  }tdi ddddd| j d|dd ddd	d d
|d| jdd ddd| jd| j	d urm| j	| jd  | j	| j  nd d| j
d ur}| j
| jd  nd dd d|d| j| _| jS dd d|d| j| _| jS )Nr   rH   rI   rJ   rK   rL   rM   Tr;   r<   rB   rA   r=   r>   rC   rD   r?   rE   r@   r   )rJ   rG   r<   r9   rN   rH   r!   rB   r>   rC   rD   r@   )rO   rK   rE   r<   r   r   r   decode_metadata   s|   



	



z,PlaceholderAttentionMetadata.decode_metadata)r2   r3   r4   r5   r   r   r8   __annotations__r9   r:   boolrC   rD   rE   rF   rG   propertyrP   rQ   r   r   r   r   r!   I   s&   
 -r!   c                   @   sR   e Zd ZdddZdd Zddd	efd
dZdee dee dedefddZ	dS )r   input_builderr   c                 C   s   || _ |j| _d S r   )rU   runner)rO   rU   r   r   r   __init__   s   z,PlaceholderAttentionMetadataBuilder.__init__c                 C   s2   g | _ g | _g | _tt| _d| _d| _d| _d S )Nr   )	prefill_seq_lenscontext_lenscurr_seq_lensr   r   multimodal_placeholder_mapsrH   rI   rJ   )rO   r   r   r   prepare   s   
z+PlaceholderAttentionMetadataBuilder.prepare
inter_dataz,ModelInputForGPUBuilder.InterDataForSeqGroupchunked_prefill_enabledc                 C   s   |j }t|jdd |jD |j|j|j|j|jD ]I\}}}}}}	}
| j	|	 |rU|j
}|r@| D ]\}}| j| | q3|  jd7  _|  j|7  _| j	| q|  j|7  _| j	| qdS )zdAdd a sequence group to the metadata. Specifically update/append
        1. context length.
        c                 S   s   g | ]}t |qS r   )len).0tr   r   r   
<listcomp>   s    zFPlaceholderAttentionMetadataBuilder._add_seq_group.<locals>.<listcomp>r(   N)	is_promptzipZseq_idsZinput_tokensZorig_seq_lensr;   
query_lensrY   Zcurr_sliding_window_blocksappendZmulti_modal_placeholder_mapsitemsr[   extendrH   rI   rX   rJ   rZ   )rO   r]   r^   rc   Zseq_idZ	token_lenZseq_lenZcurr_seq_lenZ	query_lenZcontext_lenZcurr_sliding_window_blockZmm_mapsmodalityZplaceholdersr   r   r   _add_seq_group   s.   
z2PlaceholderAttentionMetadataBuilder._add_seq_groupr;   re   cuda_graph_pad_size
batch_sizec                 C   s  t | jdr| jjD ]
}| || jj q
| jj}|dk}t|}|| jd }	t	|	dkr3t|	}
nd}
t| j
dd}t| jdd}| j}tt|dd}tt|dd}|r]|| j }|dkshJ d||dusnJ t| jtj|| jj}t|tj|| jj}t|tj|| jj}t|tj|| jj}d	d
 | j D }td}td}tdi d| jd|d|ddd| jd|d|d|d|d|
d|d|d|d|d|d|d|S )a  Build attention metadata with on-device tensors.

        Args:
            seq_lens: The maybe padded sequence lengths of the input sequences.
            query_lens: The query lengths of the input sequences.
            cuda_graph_pad_size: The padding size for cuda graph.
                                 -1 if cuda graph is not used.
            batch_size: The maybe padded batch size.
        inter_data_listNr   r(   )default)initialzquery_lens: {}c                 S   s   i | ]	\}}||  qS r   )Z	index_map)r`   ri   Zplaceholder_mapr   r   r   
<dictcomp>/  s    z=PlaceholderAttentionMetadataBuilder.build.<locals>.<dictcomp>rH   rK   rL   rM   TrI   rJ   r;   r<   rA   rB   r=   r>   rC   rD   r?   rE   r@   r   )hasattrrU   rm   rj   r^   rV   devicemaxrH   r_   rX   rZ   rJ   listr   rI   formatr   rY   r9   r8   Z
pin_memoryZint32r[   rg   rN   r!   )rO   r;   re   rk   rl   r]   rs   Zuse_captured_graphrA   Zdecode_query_lensrB   r=   r>   rJ   rC   rD   r?   r<   Zquery_start_loc_tensorZseq_start_loc_tensorZplaceholder_index_mapsZslot_mapping_tensorrE   r   r   r   build   s   





	
z)PlaceholderAttentionMetadataBuilder.buildN)rU   r   )
r2   r3   r4   rW   r\   rS   rj   r   r8   rw   r   r   r   r   r      s    

r   c                   @   s&   e Zd ZdddZdejfddZdS )r   r   Nc                 O   r-   r   r   rO   argskwargsr   r   r   rW   P     z!PlaceholderAttentionImpl.__init__c                 O   s   t r   )NotImplementedErrorrx   r   r   r   forwardS  r{   z PlaceholderAttentionImpl.forward)r   N)r2   r3   r4   rW   r9   r:   r}   r   r   r   r   r   N  s    
r   )collectionsr   dataclassesr   	itertoolsr   typingr   r   r   r   r	   r
   r9   Z vllm.attention.backends.abstractr   r   r   r   Zvllm.attention.backends.utilsr   Zvllm.multimodalr   Zvllm.worker.model_runnerr   Z
vllm.utilsr   r   r!   r   r   r   r   r   r   <module>   s(    0 
 