o
    81 iõ!  ã                   @   sš   d dl Z d dlmZ d dlmZ d dl mZ d dlmZmZ G dd„ dej	ƒZ
G dd„ dej	ƒZG d	d
„ d
ejƒZG dd„ dejƒZG dd„ dej	ƒZdS )é    N)Ú	rearrange)ÚTensor)Ú
all_reduceÚreduce_scatterc                       s0   e Zd Z				d‡ fdd„	Zddd„Z‡  ZS )ÚGPT2EmbeddingsNc           	         s    ||dœ}t ƒ  ¡  |du rtj||fd|i|¤Ž| _d| _ntj||fd|i|¤Ž| _tj||fddi|¤Ž| _|| _| jdkrNtj||fi |¤Ž| _dS dS )zÐ
        If max_position_embeddings <= 0, there's no position embeddings
        If word_embe_proj_dim is not None (e.g., OPT-350m), we embed to that dimension
            the project up to embed_dim
        ©ÚdeviceÚdtypeNÚpadding_idxZbiasFr   )	ÚsuperÚ__init__ÚnnÚ	EmbeddingÚword_embeddingsÚ
project_inZLinearÚmax_position_embeddingsÚposition_embeddings)	ÚselfÚ	embed_dimÚ
vocab_sizer   r
   Zword_embed_proj_dimr   r	   Úfactory_kwargs©Ú	__class__© úh/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/flash_attn/modules/embedding.pyr      sB   

ÿÿÿÿÿÿÿÿÿ
ÿÿÿzGPT2Embeddings.__init__c                 C   sd   |j \}}|  |¡}| jdur|  |¡}| jdkr0|du r'tj|tj|jd}|  |¡}|| }|S )úR
        input_ids: (batch, seqlen)
        position_ids: (batch, seqlen)
        Nr   ©r	   r   )	Úshaper   r   r   ÚtorchÚarangeÚlongr   r   )r   Ú	input_idsÚposition_idsÚ
batch_sizeÚseqlenÚ
embeddingsr   r   r   r   Úforward/   s   





zGPT2Embeddings.forward)NNNN)N©Ú__name__Ú
__module__Ú__qualname__r   r&   Ú__classcell__r   r   r   r   r      s    ø#r   c                       s.   e Zd Z			d‡ fdd„	Zddd„Z‡  ZS )ÚBertEmbeddingsNc           	         s‚   ||dœ}t ƒ  ¡  tj||fd|i|¤Ž| _|| _|| _| jdkr-tj||fi |¤Ž| _| jdkr?tj||fi |¤Ž| _dS dS )z“
        If max_position_embeddings <= 0, there's no position embeddings
        If type_vocab_size <= 0, there's no token type embeddings
        r   r
   r   N)	r   r   r   r   r   r   Útype_vocab_sizer   Útoken_type_embeddings)	r   r   r   r   r-   r
   r   r	   r   r   r   r   r   A   s(   

ÿÿÿ
ÿÿ
ÿzBertEmbeddings.__init__c           	      C   sˆ   |j \}}|  |¡}| jdkr&|du rtj|tj|jd}|  |¡}|| }| jdkrB|du r9tj	|tj|jd}|  
|¡}|| }|S )zz
        input_ids: (batch, seqlen)
        position_ids: (batch, seqlen)
        token_type_ids: (batch, seqlen)
        r   Nr   )r   r   r   r   r   r    r   r   r-   Zzerosr.   )	r   r!   r"   Ztoken_type_idsr#   r$   r%   r   r.   r   r   r   r&   ]   s   





zBertEmbeddings.forward)NNN)NNr'   r   r   r   r   r,   @   s    ør,   c                       s:   e Zd Zdddœ‡ fdd„
Zdedef‡ fdd„Z‡  ZS )	ÚVocabParallelEmbeddingN)Úprocess_groupr
   c                   s€   || _ |d ur+tj |¡}|| dkrtd|› d|› dƒ‚|dkr*|d ur*tdƒ‚nd}tƒ j|| g|¢R d|i|¤Ž d S )Nr   znum_embeddings (ú#) must be divisible by world_size (ú)é   z.ParallelEmbedding does not support padding_idxr
   )r0   r   ÚdistributedÚget_world_sizeÚ
ValueErrorÚRuntimeErrorr   r   )r   Únum_embeddingsr0   r
   ÚargsÚkwargsÚ
world_sizer   r   r   r   s   s   ÿÿ€&zVocabParallelEmbedding.__init__ÚinputÚreturnc                    sx   | j d u rtƒ  |¡S tj | j ¡}| j}|| |d | }}||k ||kB }|| }d||< tƒ  |¡}d||< |S )Nr3   r   g        )r0   r   r&   r   r4   Úget_rankr8   )r   r<   Úrankr   Zvocab_start_indexZvocab_end_indexZinput_ids_maskr%   r   r   r   r&   ‚   s   
zVocabParallelEmbedding.forward)r(   r)   r*   r   r   r&   r+   r   r   r   r   r/   r   s    r/   c                       s"   e Zd Zddœ‡ fdd„
Z‡  ZS )ÚColumnParallelEmbeddingN)r0   c                   sf   || _ |d urtj |¡}|| dkrtd|› d|› dƒ‚nd}tƒ j||| g|¢R i |¤Ž d S )Nr   zembedding_dim (r1   r2   r3   )r0   r   r4   r5   r6   r   r   )r   r8   Úembedding_dimr0   r9   r:   r;   r   r   r   r   “   s   ÿÿÿ$z ColumnParallelEmbedding.__init__)r(   r)   r*   r   r+   r   r   r   r   r@   ’   s    r@   c                       s0   e Zd Z				d‡ fdd„	Zd	dd„Z‡  ZS )
ÚParallelGPT2EmbeddingsNTc	           
         sj   ||dœ}	t ƒ  ¡  || _|| _t||f||dœ|	¤Ž| _|| _| jdkr3t||fd|i|	¤Ž| _dS dS )zQ
        If max_position_embeddings <= 0, there's no position embeddings
        r   )r
   r0   r   r0   N)	r   r   r0   Úsequence_parallelr/   r   r   r@   r   )
r   r   r   r   r0   r
   rC   r   r	   r   r   r   r   r   ¢   s.   

þüû
ÿÿÿÿzParallelGPT2Embeddings.__init__Fc                 C   sÒ   |j \}}tj | j¡}|  |¡}| jdkrO|du r$tj|tj|j	d}|  
|¡}|dkr2|| }n| j
j}	tj | j¡}
|d|
|	 |
d |	 …f  |7  < |rVt|dƒ}| jr[tnt}|dkrc|S ||| jƒS )r   r   Nr   r3   .zb s d -> (b s) d)r   r   r4   r5   r0   r   r   r   r    r   r   rA   r>   r   rC   r   r   )r   r!   r"   Zcombine_batch_seqlen_dimr#   r$   r;   r%   r   Zpartition_dimr?   Z	reduce_fnr   r   r   r&   Á   s(   




ÿþ
zParallelGPT2Embeddings.forward)NTNN)NFr'   r   r   r   r   rB   ¡   s    ÷rB   )r   Ztorch.nnr   Zeinopsr   r   Zflash_attn.utils.distributedr   r   ÚModuler   r,   r   r/   r@   rB   r   r   r   r   Ú<module>   s   52 