o
    )iM                     @   s  d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	m
Z
mZ d dlmZ d dlmZ dedejfdd	Z		d.d
edededededededeee  dee fddZdejdedededee deee ee f fddZ	d/dejdededededee fdd Zdejdedededeee ee f f
d!d"Zd0d$d%Zd1dejfd&d'Zd(ejd)ejd*ee d+edeejejejf f
d,d-ZdS )2    N)ListOptionalSequenceTupleType)AttentionBiasfmha)AttentionBiasSubTensor)AttentionOpBaseshapereturnc                  O   sN   d}t jg | d d || d | d |  R fi |d dd| d S )N         r   )torchrandnZnarrow)r   kwargsZalign_to r   d/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/xformers/attn_bias_utils.py_create_aligned_bias   s   
r   
batch_size	num_headsnum_heads_groupsq_lenkv_lenrequires_gradfmtop	page_sizec                    s.  | d u s	t d | rd S tdtt|| ||	gdddddd }| tju r|	dkr6||9 }d	}|
d ur]t	|
t
jjr]tj|||| f||d
d }|	dv r\|d d df }n*t|||| ||d
}tj |dddd |d	 d  d	 f< |	dv r|d d df }|r|d |	dkr|d d df }|S | t
jju r|  S | t
jju r|  S | t
jju r| |S | t
jju rt|||| ||d
}|	dv r|d d df }|	dkr|d d df }|r|d t
j|S | t
jjt
jjt
jjt
jjt
jjfv r|	dv s	J d }| t
jjt
jjhv rd}n| t
jjkr+|d us'J |d	 }t
jjjt|| |d }| t
jju rD| }| t
jjt
jjhv rtt
jj|j|j |j!d}|d usbJ | t
jju ro|"|}n|#|}| t
jju r|$ }|S | t
jj%t
jj&t
jj't
jj(t
jj)t
jj*fv r+|	dv sJ t+|| \}}t	| t
jj)r| j,n| }| t
jj'u r|j-| |t.|t.|d}n(| t
jj&u r|j-| |t/|t/|d	 t/|t/|d	 d}n|j| |d}t	| t
jj)r)|d usJ  | d	 | }tj01t2|| || |tj3d
4||}|j5||| dS |S | t
jj6t
jj7fv rm|	dv s=J | t
jj7u rFd nd}t|| |\}} | fdd|D g }t
jj7j|||dS t	| t
jj8r|	dv s|J |d usJ  | d	 | }|| }t	| t
jj9rt+|| \}}nt:|||d} fddt2|D }|| fddt;|D }|<|| |  | j,}|j|||d}tj01t2|| || |tj3d
4||}|j5||| || dS | t
jj=kr| dddddS J d|  )N-r      i,  )r   r      r   r"   BMKr   devicedtype)r#   BMHKT)BMGHKr'   )max_q_minus_k)	q_seqinfo	k_seqinfo_batch_sizes)r'   r(   )q_seqlen
kv_padding	kv_seqlenwindow_size)r-   r.   r/   window_leftwindow_right)r-   r.   r/   )block_tablesr   
paged_typec                    s   g | ]
}  d | qS r   randint).0ki)rtotal_kv_lenr   r   
<listcomp>   s    z$create_attn_bias.<locals>.<listcomp>)r-   Zkv_seqstartsr/   Fc                    s   g | ]} d  qS r   r6   )r8   _r   r:   r   r   r<          c                    s(   g | ]\}}|   d |  qS r5   r6   )r8   ir9   )r:   row_sizer   r   r<      s   ( )r3   r   r4   Znotional_padding   )r1   r2   zUnsupported bias type: )>
isinstancerandomRandomjoinmapstrr7   r   Tensor
issubclassr   Ztriton_splitkZFwOpr   r   mathinfZrequires_grad_	attn_biasZLowerTriangularMaskZ"LowerTriangularFromBottomRightMaskZ0LowerTriangularFromBottomRightLocalAttentionMaskZ!LowerTriangularMaskWithTensorBiasZBlockDiagonalMaskZBlockDiagonalCausalMaskZ&BlockDiagonalCausalFromBottomRightMaskZ%BlockDiagonalCausalLocalAttentionMaskZ4BlockDiagonalCausalLocalAttentionFromBottomRightMaskZfrom_seqlens_rand_seqlensZmake_causalr*   r+   r,   Zmake_local_attentionZ%make_local_attention_from_bottomrightZmake_causal_from_bottomrightZBlockDiagonalPaddedKeysMaskZ)BlockDiagonalLocalAttentionPaddedKeysMaskZ/BlockDiagonalCausalLocalAttentionPaddedKeysMaskZ+BlockDiagonalCausalWithOffsetPaddedKeysMaskZ PagedBlockDiagonalPaddedKeysMaskZ0PagedBlockDiagonalCausalWithOffsetPaddedKeysMask_rand_seqlens_padded_kZ_UNPAGED_TYPEZfrom_seqlens_localminmaxtensorsamplerangeint32reshapeZ
make_pagedZ*BlockDiagonalCausalWithOffsetGappyKeysMaskZBlockDiagonalGappyKeysMaskZPagedBlockDiagonalGappyKeysMaskZ/PagedBlockDiagonalCausalWithOffsetGappyKeysMask_rand_maxed_partition	enumerateappendZ!LocalAttentionFromBottomRightMask)Z	bias_typer   r   r   r   r   r%   r&   r   r   r   r   r0   rN   r)   Z
block_diagqkZblock_diag_typeZg_block_diagZpages_per_rowr3   ZstartsZtotal_queriesr   )r   r:   rB   r;   r   create_attn_bias   s   
&
	
	



r]   r:   bsr)   c                 C   s  |dkr
||ks
J ||9 }||9 }g }g }t d|d t d|d g}t d|d t d|d g}t||k rt||k r|du rU| j| }	||	 || j|  nd|t|d }
|t|d }|
|| ksJ d|
d|d|d	|d
|d|d|t||
| }| d|d }	||	 |
| | d }|dksJ |dkr||	| d|  n||	 t||k rt||k s>|t|dd  |d< |t|dd  |d< ||fS )aC  
    Generates lists of lengths of query blocks and corresponding key blocks.
    The total number of queries will be bs * q_len and the
    total number of keys will be bs * kv_len.
    max_q_minus_k: maximum allowed num_queries - num_keys.
        For "bottom-right" masks it's 0, we need to have more keys than
        queries, otherwise some queries have no keys to attend to.
        For BlockDiagonalCausalMask it's None, there is no constraint
        on num_queries - num_keys.
        For BlockDiagonalCausalLocalAttentionMask it's equal
        to the window size.
    r   r   
   r"   Nz
keys_left=z queries_left=z max_q_minus_k=z kv_len=z q_len=z seqlens_k=z seqlens_q=r   )rR   sum	randrangerZ   rQ   )r:   r^   r   r   r)   Z	seqlens_qZ	seqlens_kZstep_qZstep_kZnum_queriesZ	keys_leftZqueries_leftZmax_queries_to_takeZextra_keys_availabler   r   r   rO     s<   

,

$rO   Ttotalnmxpositivec                 C   sd   |r
||8 }|d8 }|  t|| |}tj||tjd}d| |< |d}|r.|d7 }| S )Nr   r&   )rT   rU   r   ZzerosrV   flattenr`   tolist)r:   rb   rc   rd   re   Zidxsyzr   r   r   rX   Q  s   
rX   c                    s\   | krt d| kr g|  }}||fS t|| | } fdd|D }||fS )Nzneed more queries than keysc                    s   g | ]} | qS r   r6   )r8   rA   r?   r   r   r<   s  r@   z*_rand_seqlens_padded_k.<locals>.<listcomp>)
ValueErrorrX   )r:   r^   r   r   Z	q_seqlensZ	k_seqlensr   r?   r   rP   c  s   	rP           c           	         s  j dkr%dtf fddtjfddtjd D ddS j d	kr9|d
ks0J t dS    d urKndjd d   dd } d urt	 t
tfr jjd djd jd fjtjd}n }|j d	krjd |jd |jd  ksJ |dg|jdd  }||  }|d}|d ur||d|   }| S )NrC   groupc                    sH   t  tjjr jr d d | f S  S t  tjr" d d | f S  S N)rD   r   rN   r	   ZHOLDS_DENSE_TENSORr   rJ   )rm   )rN   r   r   attn_bias_groupz  s   z&ref_attention.<locals>.attn_bias_groupc              	      sX   g | ](}t d d d d |f d d d d |f d d d d |f  |dqS )NscalerN   )ref_attention_bmhk)r8   g)ro   r\   r[   rq   vr   r   r<     s    z!ref_attention.<locals>.<listcomp>r"   dim   rl   rp   r   r   g      ?r   r$   )ndimintr   stackrU   r   rr   floatZ	transposerD   r   r	   materializer%   float32rW   Zsoftmax)	r[   r\   rt   rN   Z	drop_maskprq   ZattnZattn_bias_tensorr   )rN   ro   r\   r[   rq   rt   r   ref_attentionw  sF   



"
r   c                 C   s   | j dksJ dd }t|ttfr@|j| jd | jd | jd |jd f| jtjd	| jd | jd  | jd |jd g}t
|| ||||||d}|	| jd | jd | jd |jd	 g}|d
S )Nrw   c                 S   s2   |  d| jd | jd  | jd | jd gS )Nr   r"   r   r   r   r"   r   r   )permuterW   r   )tr   r   r   T  s   
$zref_attention_bmhk.<locals>.Tr   r"   r   r$   )rq   r   r   )ry   rD   r   r	   r}   r   r%   r   r~   rW   r   r   )r[   r\   rt   rN   rq   r   outr   r   r   rr     s   "(*
rr   cache_kcache_v
kv_seqlensBLOCK_Nc                    sP   fdd|D }t |}| j\}}}}	tj|||	| j| jd}
tj|||	| j| jd}d}t|D ]0}| |d|| f  |
||||  < ||d|| f  |||||  < ||| 7 }q2|  d   }tj|dtj	d
d||}t|jddt| jdd	
d  }||  jtj	d
}||

d|
dfS )a  
    Create block tables and pages K/V cache for testing paged attention.
    Args:
        cache_k, cache_v: K/V caches, each of shape [B, MAX_T, H_kv, D].
            Note that these tensors are unexpanded,
            i.e. for multiquery case cache_k.shape[2] = 1
        kv_seqlens: list of K/V sequence lengths
        BLOCK_N: number of tokens per per paged attention block
        B: batch size
    Returns:
        block_tables: [B, MAX_BLOCKS]
        packed_cache_k: [1, total_len_rounded, H_kv, D]
        packed_cache_v: [1, total_len_rounded, H_kv, D]
    where total_len_rounded is a sum of K/V seqlens, each rounded up
    to a multiple of BLOCK_N.
    c                    s    g | ]}|  d      qS r=   r   )r8   xr   r   r   r<     s     z!pack_kv_cache.<locals>.<listcomp>r$   r   Nr   cudaru   )r%   rf   )r`   r   r   emptyr%   r&   rU   cloneZarangerV   Z	unsqueezeexpandrS   Zcumsumto
contiguous)r   r   r   r   Zkv_seqlens_roundedZtotal_len_roundedBZMAX_THDZpacked_cache_kZpacked_cache_vZseqstartbZnum_blocks_per_rowr3   Z	seqstartsr   r   r   pack_kv_cache  sP   r   )NN)T)NNrl   Nrn   )rL   rE   typingr   r   r   r   r   r   Zxformers.opsr   r   Zxformers.ops.fmha.attn_biasr	   Zxformers.ops.fmha.commonr
   rz   rJ   r   boolrI   r]   rF   rO   rX   rP   r   rr   r   r   r   r   r   <module>   s   	


 j
J


9