o
    81 i5                     @   s   d dl Z d dlZd dlmZmZ d dlmZmZ dddZ		ddd	Z		
	 				dddZ
				dddZ										
	 	 				dddZdS )    N)	rearrangerepeat)	pad_inputunpad_inputrandomFc                 C   s   |dv sJ |dkrt j|df| |t jd}n.|dkr2t jt|r"dnd| d | d |df|d}n|d	krEt j| d
 | d |df|d}|r\t|D ]}|d dkrWd||< qKd|d< tt j| |dd|d|k }|S )N)fullr   thirdr      devicedtyper   r      )r   r         zs -> b sb)torchr   int32randintmaxranger   arange)Z
max_seqlen
batch_sizer   modeZzero_lengthslengthsiZpadding_mask r   d/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/flash_attn/utils/testing.pygenerate_random_padding_mask
   s$   "r   c
           #         sN  |r|rJ | j \ }
}|j d }|j \}}}|j  ||fks$J |j  ||fks/J |dus7|	dur?|r;J |r?J |durdt| ||\}}}} fdd}|durat|d nd}n*t| d}tjd d  tj|jd	}d}} fd
d}|durt|dnd}|durt|||	\}}}}t|||	^}}nt|d}t|d}tjd d  tj|jd	}d}}|r||k sJ |
|ksJ tj|||gdd}tj| ||gdd}|dur fdd}n fdd}|	 
 |||	 
 ||fS |rTtj||gdd}tj||gdd}|} |dur1 fdd}!n fdd}!|	 
 |	 
 ||||| 	 
 |	 
 || |!fS |} |durd fdd}"n fdd}"|	 
 |	 
 |	 
 |dur|	 nd||||||| 	 
 |	 
 |	 
 |dur|	 nd|| |"fS )a  
    Arguments:
        q: (batch_size, seqlen_q, nheads, d)
        k: (batch_size, seqlen_k, nheads_k, d)
        v: (batch_size, seqlen_k, nheads_k, d_v)
        query_padding_mask: (batch_size, seqlen), bool
        key_padding_mask: (batch_size, seqlen), bool
    r   Nc                       t |  S Nr   Zoutput_unpadr   	indices_qseqlen_qr   r   <lambda>;   s    zgenerate_qkv.<locals>.<lambda>zb s ... -> (b s) ...zb s h d -> (b s) h dr   r	   )stepr   r   c                       t | d dS Nz(b s) h d -> b s h dr   r   r#   r   r   r   r'   F       dim   c                    r    r!   r"   Z
dqkv_unpadr$   r   r   r'   _       c                    r)   Nz(b s) t h d -> b s t h dr   r+   r1   r,   r   r   r'   a   r-   c                    r    r!   r"   Z	dkv_unpadr   	indices_kseqlen_kr   r   r'   q   r2   c                    r)   r3   r+   r4   r,   r   r   r'   s   r-   c                    r    r!   r"   Zdk_unpadr5   r   r   r'      r2   c                    r)   r*   r+   r8   r,   r   r   r'      r2   )shaper   r   r   r   r   r   allstackdetachZrequires_grad_)#qkvquery_padding_maskkey_padding_maskqvZkvpackedZ	qkvpackedZquery_unused_maskZkey_unused_maskZnheadsdZd_v_Znheads_kZq_unpadZcu_seqlens_qZmax_seqlen_qZ	seqused_qZoutput_pad_fnZqv_unpadZk_unpadZcu_seqlens_kZmax_seqlen_kZ	seqused_kZv_unpadrestZ	qkv_unpadZqkvZdqkv_pad_fnZkv_unpadkvZ	dq_pad_fnZ
dkv_pad_fnZ	dk_pad_fnr   )r   r6   r%   r7   r&   r   generate_qkv!   s   

















rG   NNc              	   C   s"  t tj| |tjdd}tj||tjd}	|d ur3t |d}t|	d|jd d}	t|	|k|	| d}	|d u r9|nt |dd}
|d u rG| nt |dd}|d d u ra|	||
 | |d	  kS |d u rkt|	|n|
}
t	|	t
||
 | |d	  |
kt|	||
 | |d  k |	|kS )
Nr
   s -> s 1b -> b 1 1 1s -> b 1 1 sr   r           r   r	   )r   r   r   longr   r9   wheresum	full_like
logical_orminimumlogical_and)r&   r7   window_sizesink_token_lengthr@   rA   key_leftpadr   row_idxcol_idxsksqr   r   r   construct_local_mask   s*   

"r[   c                 C   s   t tj| |tjdd}tj||tjd}|d ur3t |d}t|d|jd d}t||k|| d}|d u r9|nt |dd}	|d u rG| nt |dd}
|d u rYt||n|	}	||	 |
 ||	 |
 |  }t	||k ||| kS )	Nr
   rI   rJ   rK   r   r   rL   r   )
r   r   r   rM   r   r9   rN   rO   rP   rQ   )r&   r7   attention_chunkr@   rA   rV   r   rW   rX   rY   rZ   Zcol_limit_left_chunkr   r   r   construct_chunk_mask   s&   	
r]           Tc           "   
   C   s  |	r|d df}| j }|r%|  | | } }}|
dur#|
 nd}
|durRt|d| jd |jd  d}|  | | j } |
durP|
 | |
j nd}
|durd| t|d j|j d}|durv| t|d j|j d}| jd |jd }}t|d	| jd |jd  d}t|d	| jd |jd  d}| jd
 }|jd
 }dt|
du r|n||  }|st	d| | |}n	t	d| || }|
dur|t	d|
| | }|dkrt
|| | }|dur|t| dtd d}|d dus	|d durt|||||||| jd}|dkr4t||||||| jd}|dur2t||n|}|durA||td |durJ|| }tj|d
d|j }|durd|t| dd}|durs|t| dd}|dur|tj|d
ddd}dd|  }|dur|| d} n|} |dur| || j } t	d| || }!|dur|!t| dd |!j|d|j|dfS )a  
    Arguments:
        q: (batch_size, seqlen_q, nheads, head_dim)
        k: (batch_size, seqlen_k, nheads, head_dim)
        v: (batch_size, seqlen_k, nheads, head_dim_v)
        qv: (batch_size, seqlen_q, nheads, head_dim_v)
        query_padding_mask: (batch_size, seqlen_q)
        key_padding_mask: (batch_size, seqlen_k)
        attn_bias: broadcastable to (batch_size, nheads, seqlen_q, seqlen_k)
        dropout_p: float
        dropout_mask: (batch_size, nheads, seqlen_q, seqlen_k)
        causal: whether to apply causal masking
        upcast: whether to cast all inputs to fp32, do all computation in fp32, then cast
            output back to fp16/bf16.
        reorder_ops: whether to change the order of operations (scaling k instead of scaling k, etc.)
            without changing the math. This is to estimate the numerical error from operation
            reordering.
    Output:
        output: (batch_size, seqlen_q, nheads, head_dim_v)
        attention: (batch_size, nheads, seqlen_q, seqlen_k), softmax after dropout
    r   Nzb h -> b 1 (h g) 1r0   )gzb h -> b 1 h 1)r   r	   zb s h d -> b s (h g) dr   g      ?zbthd,bshd->bhtszb s -> b 1 1 sz-inf)rV   r   r.   zb s -> b 1 s 1r^   T)r/   Zkeepdimzbhts,bshd->bthdzb s -> b s 1 1)r   floatr   r9   tor   mathsqrtr   ZeinsumtanhZmasked_fill_r[   r   r]   rQ   ZsoftmaxZmasked_fillr:   )"r=   r>   r?   r@   rA   rV   Z	attn_biasZ	dropout_pZdropout_maskZcausalrB   Z	q_descaleZ	k_descaleZ	v_descalerT   r\   rU   ZsoftcapZupcastZreorder_opsZintermediate_dtypeZdtype_ogr&   r7   rC   ZdvZsoftmax_scaleZscoresZ
local_maskZ
chunk_maskZ	attentionZdropout_scalingZattention_dropoutputr   r   r   attention_ref   s   * 



	







rf   )r   F)NNNFFNN)rH   r   NNNN)NNNN)NNNNr^   NFNNNNrH   r   r   r^   TFN)rb   r   Zeinopsr   r   Zflash_attn.bert_paddingr   r   r   rG   r[   r]   rf   r   r   r   r   <module>   sL   

 
(
%