o
    0 i                     @   sn   d dl mZ d dlZd dlmZ dejdedejfddZd	ejd
ejdejdejdeej defddZ	dS )    )OptionalN)nnhidden_statesn_repreturnc                 C   s^   | j \}}}}|dkr| S | dddddddddf |||||} | ||| ||S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandreshape)r   r   batchZnum_key_value_headsslenZhead_dim r   q/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/transformers/integrations/eager_paged.py	repeat_kv   s
   0r   modulequerykeyvalueattention_maskscalingc                 K   s  | dd }|d ur*|j||| jfi |\}}|ddd}|ddd}t| dr;t|| j}t|| j}t|t	rWt
| dd}|dksN|d u rPdnd}	||	 }
n|}
t||dd	| }|
d urm||
 }t| d
r| jdddd|jd d|jd d}tj||gdd}||jdddj }tjj|dtjd|j}|dd df }ntjj|dtjd|j}t||}|dd }||fS )Ncacher   r   num_key_value_groupssliding_windowZfull_attentionZsliding_attention      sinks)dimT)r   Zkeepdim)r   dtype.)popupdateZ	layer_idxZ	transposeZ	unsqueezehasattrr   r   
isinstancedictgetattrtorchmatmulr   r
   r	   r   catmaxvaluesr   Z
functionalZsoftmaxZfloat32tor   
contiguous)r   r   r   r   r   r   kwargsr   r   Z
layer_typeZcausal_maskZattn_weightsr   Zattn_outputr   r   r   eager_paged_attention_forward   s4   




*r.   )
typingr   r&   r   ZTensorintr   Modulefloatr.   r   r   r   r   <module>   s"    