o
    81 ic                     @   s  d dl Z d dlZd dlmZ d dlm  mZ ejjdd Z	ejjdd Z
G dd dejjZejZejjdd	 Zejjd
d ZG dd dejjZejZejjdd Zejjdd Zejjdd ZdZdZejjeZejjjeddZG dd dejjZejZ dS )    Nc                 C   s<   ||  }|d dt d| dd| |     j| jdS N      ?g      ? e3E?   Hm?dtypetorchtanhtor   )ybiasx r   f/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/flash_attn/ops/activations.py	bias_gelu   s   4r   c                 C   s|   || }t d| dd| |   }d| d||  dd| |    dd|   }||  }|j|jd|jd|jdfS )	z5Assume that y has shape (B, D) and bias has shape (D)r   r   r   r   6vf?r   r   )dimr   )r
   r   r   r   sum)gr   r   r   tanh_outffZgrad_yr   r   r   bias_gelu_back   s   $r   c                   @   $   e Zd Zedd Zedd ZdS )GeLUFunctionc                 C      |  || t||S N)save_for_backwardr   )ctxinputr   r   r   r   forward&   s   
zGeLUFunction.forwardc                 C   s   | j \}}t|||}||fS r   )saved_tensorsr   )r   grad_outputr    r   tmpr   r   r   backward,   s   
zGeLUFunction.backwardN__name__
__module____qualname__staticmethodr!   r%   r   r   r   r   r   %   
    
r   c                 C   s4   | d dt d|  dd|  |      j| jdS r   r	   )r   r   r   r   gelu_fwd8   s   4r,   c                 C   s`   t d| dd| |   }d| d||  dd| |    dd|   }||  j|jdS )Nr   r   r   r   r   r   r	   )r   r   r   r   r   r   r   gelu_bwd@   s
   $r-   c                   @   r   )FastGeLUFunctionc                 C   s   |  | t|S r   )r   r,   )r   r    r   r   r   r!   K   s   
zFastGeLUFunction.forwardc                 C   s   | j \}t||}|S r   )r"   r-   )r   r#   r    r$   r   r   r   r%   Q   s   
zFastGeLUFunction.backwardNr&   r   r   r   r   r.   J   r+   r.   c                 C   s   t |dk| dj|jdS )Nr   g        r   )r
   wherer   r   r   r   r   r   r   relu_bwd[      r1   c                 C   s   t | }|| j| jdS )Nr   FZrelur   r   )r   rr   r   r   
sqrelu_fwd`   s   
r6   c                 C   s   d|  t | j|jdS )Ng       @r   r3   r0   r   r   r   
sqrelu_bwdf   r2   r7   zn
template <typename T> T swiglu_fwd(T x, T y) {
    return float(x) * float(y) / (1.0f + ::exp(-float(x)));
}
z
template <typename T> void swiglu_bwd(T x, T y, T g, T& dx, T& dy) {
    float x_sigmoid = 1.0f / (1.0f + ::exp(-float(x)));
    dx = x_sigmoid * (1 + float(x) * (1.0f - x_sigmoid)) * float(g) * float(y);
    dy = float(x) * x_sigmoid * float(g);
}
   )Znum_outputsc                   @   r   )SwiGLUFunctionc                 C   r   r   )r   
swiglu_fwd)r   r   r   r   r   r   r!   }   s   
zSwiGLUFunction.forwardc                 C   s   | j \}}t|||S r   )r"   
swiglu_bwd)r   Zdoutr   r   r   r   r   r%      s   
zSwiGLUFunction.backwardNr&   r   r   r   r   r9   {   s
    
r9   )!mathr
   Ztorch.nnnnZtorch.nn.functionalZ
functionalr4   Zjitscriptr   r   ZautogradFunctionr   applyZbias_gelu_implr,   r-   r.   Zfast_gelu_implr1   r6   r7   Zswiglu_fwd_codestringZswiglu_bwd_codestringcudaZ	jiteratorZ_create_jit_fnr:   Z_create_multi_output_jit_fnr;   r9   Zswiglur   r   r   r   <module>   s8   	



	



