o
    0 i                     @   s   d dl mZ d dlZdejdedejfddZ		dd	ejjd
ejdejdejdeej dedee de	ejdf fddZ
dS )    )OptionalNhidden_statesn_repreturnc                 C   s^   | j \}}}}|dkr| S | dddddddddf |||||} | ||| ||S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandZreshape)r   r   batchZnum_key_value_headsslenZhead_dim r   p/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/transformers/integrations/sdpa_paged.py	repeat_kv   s
   0r           modulequerykeyvalueattention_maskdropoutscalingc              	   K   s   | dd }|d ur*|j||| jfi |\}}|ddd}|ddd}t| dr;t|| j}t|| j}|}	| }| }| }t	j
jj||||	||dd}
|
dd }
|
d fS )Ncacher   r   num_key_value_groupsF)Z	attn_maskZ	dropout_pscaleZ	is_causal   )popupdateZ	layer_idxZ	transposeZ	unsqueezehasattrr   r   
contiguoustorchnnZ
functionalZscaled_dot_product_attention)r   r   r   r   r   r   r   kwargsr   Zcausal_maskZattn_outputr   r   r   sdpa_attention_paged_forward   s.   

r!   )r   N)typingr   r   ZTensorintr   r   Modulefloattupler!   r   r   r   r   <module>   s,    	