o
    )i                  	   @   s  d dl mZmZ d dlZd dlZdejdejjddfddZdejdejjdeejeejj	 f fd	d
Z
dejdejjdeejeejj	 f fddZdejdejjdejfddZdejdejjdejfddZG dd dejjZdejdejjdejfddZG dd dejjZdejdejjdejfddZG dd dejjZdejdejjdejfddZG dd dejjZdejdejjdejfdd ZdS )!    )OptionalTupleNxprocess_groupreturnc                C   s0   |  }|dkr
d S tjj| tjjj|d d S )N   )Ztensoropgroup)sizetorchdistributed
all_reduceReduceOpSUM)r   r   mp_size r   s/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/xformers/ops/differentiable_collectives.pyr      s   
r   input_c                C   sX   |  }|dkr| d fS | | jd | f| jdd   }tjj|| |dd}||fS )Nr   r   T)Zoutput_tensorZinput_tensorr	   async_op)r
   	new_emptyshaper   r   Zall_gather_into_tensorr   r   r   outputhandler   r   r   gather_along_first_dim_async   s   $r   c                C   sv   |  }|dkr| d fS | jd | dksJ | | jd | f| jdd   }tjj|| tjjj|dd}||fS )Nr   r   T)r   inputr   r	   r   )r
   r   r   r   r   Zreduce_scatter_tensorr   r   r   r   r   r   $reduce_scatter_along_first_dim_async+   s   $r   c                C   $   t | |d\}}|d ur|  |S Nr   )r   waitr   r   r   r   r   r   r   gather_along_first_dim?   s   r"   c                C   r   r   )r   r    r!   r   r   r   reduce_scatter_along_first_dimH   s   
r#   c                   @   P   e Zd ZedejdejjdejfddZedejde	ejdf fdd	Z
dS )
_CopyToModelParallelRegionr   r   r   c                 C   s
   || _ |S Nr   ctxr   r   r   r   r   forwardT   s   z"_CopyToModelParallelRegion.forwardgrad_outputNc                 C   s   t || jd |d fS r   )r   r   r(   r*   r   r   r   backward[   s   z#_CopyToModelParallelRegion.backward__name__
__module____qualname__staticmethodr   Tensorr   ProcessGroupr)   r   r,   r   r   r   r   r%   S       r%   c                 C      t | |S r&   )r%   applyr   r   r   r   r   copy_to_model_parallel_regionc      r8   c                   @   r$   )
_ReduceFromModelParallelRegionr   r   r   c                 C   s   t ||d | | |S r   )r   Z
mark_dirtyr'   r   r   r   r)   j   s   
z&_ReduceFromModelParallelRegion.forwardr*   Nc                 C   s   |d fS r&   r   r+   r   r   r   r,   r   s   z'_ReduceFromModelParallelRegion.backwardr-   r   r   r   r   r:   i   s    r:   c                 C   r5   r&   )r:   r6   r7   r   r   r   !reduce_from_model_parallel_regiony   r9   r;   c                   @   r$   )
!_GatherFromSequenceParallelRegionr   r   r   c                 C      || _ t||dS r   )r   r"   r(   r   r   r   r   r   r)         z)_GatherFromSequenceParallelRegion.forwardr*   Nc                 C      t || jdd fS r   )r#   r   r+   r   r   r   r,      s
   z*_GatherFromSequenceParallelRegion.backwardr-   r   r   r   r   r<      r4   r<   c                 C   r5   r&   )r<   r6   r7   r   r   r   $gather_from_sequence_parallel_region   r9   rA   c                   @   r$   )
 _ScatterToSequenceParallelRegionr   r   r   c                 C   r=   r   )r   r#   r>   r   r   r   r)      r?   z(_ScatterToSequenceParallelRegion.forwardr*   Nc                 C   r@   r   )r"   r   r+   r   r   r   r,      s   z)_ScatterToSequenceParallelRegion.backwardr-   r   r   r   r   rB      r4   rB   c                 C   r5   r&   )rB   r6   r7   r   r   r   #scatter_to_sequence_parallel_region   r9   rC   )typingr   r   r   Ztorch.distributedr2   r   r3   r   ZWorkr   r   r"   r#   ZautogradFunctionr%   r8   r:   r;   r<   rA   rB   rC   r   r   r   r   <module>   s   



	



