o
    )i                     @   s^  d dl mZ d dlZd dlm  mZ d dlmZ	 d dlmZ d dl
mZmZmZ dZedZeeg dejgdgd	ZG d
d dejZG dd deZG dd deZG dd dejjZG dd deZG dd dZG dd deZG dd deZG dd deZG dd deZeeeeeeedZedd eeed! ed"d d eeed# dS )$    )TupleN)nn)benchmark_main_helper2	DTYPE2STRproduct_dictg      ?cuda))i @        r   )r      i <  r
   )        `  r   )r   r   r   r   )   r   r   r   )i  r   r   r   )i 0     r	   r   F)B_in_hidden_out_ftdtypebiasc                       sP   e Zd ZejZdeeeeef dededdf fddZ	dd	 Z
d
d Z  ZS )Mlpr   r   bwreturnNc           	         s   |\}}}}t    d| _t|  d| d| d| d| d|r#dnd | _| j|||d| _t | _	| j|||d| _
tj||gd|d	| _tj||gd|d
d| _| j| _| d| d S )Nmlp (,) b )r   r   )devicer   Tr   r   Zrequires_grad)super__init__labelr   	sub_label
LINEAR_CLSfc1r   ZGELUactfc2torchrandngradinputoutto	selfr   r   r   r   BZin_ftZhid_ftZout_ft	__class__ n/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/xformers/benchmarks/benchmark_sp24.pyr   .   s   
0
zMlp.__init__c                 C   s.   | j }| |}| |}| |}|| _d S N)r)   r#   r$   r%   r*   r-   xr1   r1   r2   fwA   s
   



zMlp.fwc                 C   s   | j j| jdd d S )NT)Zretain_graph)r*   Zbackwardr(   r-   r1   r1   r2   r   H   s   zMlp.bw)__name__
__module____qualname__r   Linearr"   r   intboolr   r6   r   __classcell__r1   r1   r/   r2   r   +   s    r   c                   @      e Zd Zdd ZdS )MlpDenseMaskc                 C   sD   | j }| |}tjj|}|| }| |}| |}|| _d S r3   )	r)   r#   r&   opsZxformersZsparse24_largest_mask_2dr$   r%   r*   )r-   r5   maskr1   r1   r2   r6   M   s   



zMlpDenseMask.fwNr8   r9   r:   r6   r1   r1   r1   r2   r@   L       r@   c                   @   r?   )MlpAct24c                 C   s8   | j }| |}t|}| |}| |}|| _d S r3   )r)   r#   xops
sparsify24r$   r%   r*   r4   r1   r1   r2   r6   Z   s   




zMlpAct24.fwNrC   r1   r1   r1   r2   rE   Y   rD   rE   c                   @   s"   e Zd ZdejdejfddZdS )	LinearW24r)   r   c                 C   s"   t j| jddd}t||| jS )NZ24denseZ
cusparselt)Zgradientbackend)rF   rG   weightFZlinearr   )r-   r)   Zw_sparser1   r1   r2   forwardf   s   zLinearW24.forwardN)r8   r9   r:   r&   TensorrL   r1   r1   r1   r2   rH   e   s    rH   c                   @   s   e Zd ZeZdS )MlpW24N)r8   r9   r:   rH   r"   r1   r1   r1   r2   rN   o   s    rN   c                       sD   e Zd Zdeeeeef dededdf fddZd
dd	Z  ZS )MicrobenchmarkBaser   r   r   r   Nc           	         s   |\}}}}t    d| _t|  d| d| d| d| d|r#dnd | _tj||gd|dd	| _| j 	  | _
t| j| _d S )
Nr   r   r   r   r   r   r   Tr   )r   r   r    r   r!   r&   r'   r)   t
contiguousinput_colMajorrF   rG   input_spr,   r/   r1   r2   r   t   s   
0zMicrobenchmarkBase.__init__c                 C   s   d S r3   r1   r7   r1   r1   r2   r      s   zMicrobenchmarkBase.bw)r   N)	r8   r9   r:   r   r<   r=   r   r   r>   r1   r1   r/   r2   rO   s   s    rO   c                   @      e Zd ZdejfddZdS )MicrobenchmarkSparsify24r   c                 C   s   t | j | jS r3   )rF   rG   r)   r7   r1   r1   r2   r6      s   zMicrobenchmarkSparsify24.fwNr8   r9   r:   r&   rM   r6   r1   r1   r1   r2   rU          rU   c                   @   rT   )MicrobenchmarkSp24ApplyDenser   c                 C   s   t j| j| jdd | jS NT)patternZ	out_dense)rF   sparsify24_liker)   rS   r7   r1   r1   r2   r6         zMicrobenchmarkSp24ApplyDense.fwNrV   r1   r1   r1   r2   rX      rW   rX   c                   @   rT   )MicrobenchmarkSp24ApplyDenseTr   c                 C   s   t j| j| jdd | jS rY   )rF   r[   rR   rS   r)   r7   r1   r1   r2   r6      r\   z MicrobenchmarkSp24ApplyDenseT.fwNrV   r1   r1   r1   r2   r]      rW   r]   c                   @   rT   )MicrobenchmarkInputCloner   c                 C   s   | j   | j S r3   )r)   cloner7   r1   r1   r2   r6      s   
zMicrobenchmarkInputClone.fwNrV   r1   r1   r1   r2   r^      rW   r^   )Zact24ZdenseZw24Zs24_inp_sparsify24Zs24_inp_apply_denseZs24_inp_apply_dense_tZs24_inp_cloneZsp24_fwT)r6   cases	functionsmin_run_timeZ	sp24_fwbw)r6   r   r`   ra   rb   ) typingr   r&   Ztorch.nn.functionalr   Z
functionalrK   Zxformers.opsrA   rF   utilsr   r   r   rb   r   listZhalfZCASESModuler   r@   rE   r;   rH   rN   rO   rU   rX   r]   r^   ra   r1   r1   r1   r2   <module>   sX   
!
	

