o
    * i                     @   s\   d dl Z d dlmZ ddlmZ ddlmZmZmZm	Z	m
Z
mZmZmZmZ dadd ZdS )    N)fleet   )ParallelMode)	DualPipeVParallelPipelineLayerPipelineParallelPipelineParallelWithInterleave$PipelineParallelWithInterleaveFthenBSegmentParallelShardingParallelTensorParallelVPPFhenBInBalancedMemoryc                 C   s  t j }| dusJ dtj dkr| S |j}|jri|jd s$|jd r&dnd}|dkr?tjj| dddd|jd r;dnd	d
} |jd }|jd }|jd }|jd }|jd }|jd }	tjj||||||	da	|j
rztj| |j|j|jd}
|
S |j tjkrt| |j|d} | S |j tjkrtj| |j|j|j|j d} | S |j tjkrt| |j|d} | S |j tjkrt| |j|d} | S |j tjkrMt| tsJ d|jd jrt| |j|d} | S |   dkrt!| |j|d} | S |j"d }|j# }|d| krt$| |j|d} | S ||  kr%d| k rBn n|jd j%r8t&| |j|d} | S t'| |j|d} | S t(d| d| d| S )ah  
    Return distributed data parallel model (Only work in dygraph mode)

    Args:
        model (Layer): the user-defined model which inherits Layer.

    Returns:
        distributed data parallel model which inherits Layer.

    Examples:

        .. code-block:: python

            >>> import paddle
            >>> import paddle.nn as nn
            >>> from paddle.distributed import fleet

            >>> class LinearNet(nn.Layer):
            ...     def __init__(self):
            ...         super().__init__()
            ...         self._linear1 = nn.Linear(10, 10)
            ...         self._linear2 = nn.Linear(10, 1)
            ...     def forward(self, x):
            ...         return self._linear2(self._linear1(x))

            >>> # 1. initialize fleet environment
            >>> fleet.init(is_collective=True)

            >>> # 2. create layer & optimizer
            >>> layer = LinearNet()
            >>> loss_fn = nn.MSELoss()
            >>> adam = paddle.optimizer.Adam(
            ...     learning_rate=0.001, parameters=layer.parameters())

            >>> # 3. get data_parallel model using fleet
            >>> adam = fleet.distributed_optimizer(adam)
            >>> dp_layer = fleet.distributed_model(layer)

            >>> # 4. run layer
            >>> inputs = paddle.randn([10, 10], 'float32')
            >>> outputs = dp_layer(inputs)
            >>> labels = paddle.randn([10, 1], 'float32')
            >>> loss = loss_fn(outputs, labels)
            >>> print("loss:", loss.numpy())
            >>> loss.backward()
            >>> adam.step()
            >>> adam.clear_grad()


    Nzmodel should not be Noner   Zuse_pure_fp16Zuse_pure_bf16ZO2ZO1Zfloat16Zbfloat16)modelsZ
optimizerslevelZmaster_weightZ
save_dtypeZdtypeinit_loss_scaling
incr_ratio
decr_ratioincr_every_n_stepsdecr_every_n_nan_or_infuse_dynamic_loss_scaling)r   r   r   r   r   r   )comm_buffer_sizelast_comm_buffer_sizefind_unused_parameters)strategy)r   r   r   groupzDFor pipeline parallel, the model should an instance of PipelineLayerZ
pp_configsaccumulate_steps   zThe accumulate_steps(z/) should be greater than or equal to pp_degree()))r   paddledistributedZget_world_sizeZ_user_defined_strategyampZamp_configsZdecorateZ
GradScaler_grad_scalarZheter_ccl_modeZDataParallelZfuse_grad_size_in_MBZlast_comm_group_size_MBr   Z_hcgZget_parallel_moder   ZSHARDING_PARALLELr   ZDATA_PARALLELZget_data_parallel_groupZSEGMENT_PARALLELr
   ZTENSOR_PARALLELr   ZPIPELINE_PARALLEL
isinstancer   Zhybrid_configsZuse_dualpipevr   Zget_num_virtual_stagesr   Zpipeline_configsZget_pipe_parallel_world_sizer   Zbest_unbalanced_schedulerr   r	   
ValueError)modelZ	fleet_envr   r   r   r   r   r   r   r   distributed_modelr   Z	pp_degree r&   j/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/distributed/fleet/model.pyr%   "   s   3



	.,$"

 r%   )r   Zpaddle.distributedr   Zbase.topologyr   Zmeta_parallelr   r   r   r   r	   r
   r   r   r   r!   r%   r&   r&   r&   r'   <module>   s   ,