o
    pi"                     @   s^   d dl mZmZ d dlmZ d dlmZmZmZ dd Z	dd Z
dd	d
Zdd Zdd ZdS )    )_C_ops_legacy_C_ops)check_variable_and_dtype)LayerHelperin_dynamic_modein_dynamic_or_pir_modec                 C   sX   t  r	t| |S d}t|fi t }|j| jd}|j|d| id|id|id |S )aO  
    calculate the expert count according to the gate index.
    Args:
        numbers (Tensor): Tensor. The input gate index whose data type should be int32 or int64.
        upper_range (int): The number of the experts.
    Returns:
        out (Tensor): The output expert count.
    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env: DISTRIBUTED)
            >>> import paddle
            >>> from paddle.distributed.models.moe import utils
            >>> numbers = [[0, 2], [0, 2]]
            >>> upper_range = 6
            >>> numbers = paddle.to_tensor(numbers, dtype="int64")
            >>> number_count = utils._number_count(numbers, upper_range)
            >>> print(number_count)
            Tensor(shape=[6], dtype=int64, place=Place(gpu:0), stop_gradient=True,
            [2, 0, 2, 0, 0, 0])
    number_countdtypenumbersOutupper_rangetypeinputsoutputsattrs)r   r   r   r   locals"create_variable_for_type_inferencer
   	append_op)r   r   op_typehelperout r   j/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/paddle/distributed/models/moe/utils.py_number_count   s   r   c                 C   sh   t  rt| ||d S d}t|fi t }|j|jd}|j|| g|g|d gdd|gid |S )a  
    Assign pos decides which tokens should be fetched belong to
    specially expert orderly.

    Args:
        x (Tensor): Tensor. Every element in the list must be a Tensor whose data type
            should be float16, float32, float64, int32 or int64.
        cum_count (Tensor): The cumulative sum tokens of counters. Every element in the list must be a Tensor whose
            data type should be int64.

    Returns:
        out (Tensor): Assemble numbers in the order of counters.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env: DISTRIBUTED)
            >>> import paddle
            >>> from paddle.distributed.models.moe import utils
            >>> number_count = [2, 0, 2, 0]
            >>> numbers = [[0, 2], [0, 2]]
            >>> number_count = paddle.to_tensor(number_count, dtype="int64")
            >>> numbers = paddle.to_tensor(numbers, dtype="int64")
            >>> num_cum = paddle.cumsum(number_count)
            >>> pos = utils._assign_pos(x=numbers, cum_count=num_cum)
            >>> print(pos)
            Tensor(shape=[4], dtype=int64, place=Place(gpu:0), stop_gradient=True,
            [2, 0, 3, 1])
    
assign_posr	   )X	cum_countZeff_num_lenr   )r   r   r   )r   r   r   r   r   r   r
   r   )xr   r   r   r   r   r   r   _assign_pos?   s   	r!      c                 C   s,   |dkrt  rt||| S tdtd)ab  
    random routing topk gate idx
    ```
        out = topk_idx
        for i in len(topk_idx):
            if topk * value[i][topk-1] < prob[i]:
                out[i][topk-1] = -1
    ```
    Args:
        topk_idx: gate idx, shape=(N, topk)
        topk_value: values, shape = topk_idx.shape
        prob: random prob, shape=(topk_idx.shape[0],)
    r"   z$Not supporting static graph mode nowzonly topk=2 is supported now)r   r   Zrandom_routingRuntimeError)Ztopk_idxZ
topk_valueZprobZtopkr   r   r   _random_routingq   s
   r$   c                 C   s\   t  r
t| ||S d}t|fi t }|j| jd}|j|| |dd|id|id |S )aq  
    limit the expert count by capacity.
    Args:
        expert_count (Tensor): Tensor. The input expert count whose data type should be int32 or int64.
        capacity (Tensor): Tensor. The input capacity whose data type should be int32 or int64 and the elements of capacity should be the same with expert_count.numel()/n_work.
        n_work (int): The number of the works.
    Returns:
        out (Tensor): The output expert count limit by capacity.
    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env: DISTRIBUTED)
            >>> import paddle
            >>> from paddle.distributed.models.moe import utils
            >>> expert_count = [1, 2, 2, 8, 3, 6]
            >>> capacity = [5, 5, 5]
            >>> n_work = 2
            >>> expert_count = paddle.to_tensor(expert_count, dtype="int64")
            >>> capacity = paddle.to_tensor(capacity, dtype="int64")
            >>> out = utils._limit_by_capacity(expert_count, capacity, n_work)
            >>> print(out)
            Tensor(shape=[6], dtype=int64, place=Place(gpu:0), stop_gradient=True,
            [1, 2, 2, 4, 3, 3])
    limit_by_capacityr	   )expert_countcapacityr   n_workerr   )r   r   r%   r   r   r   r
   r   )r&   r'   r(   r   r   r   r   r   r   _limit_by_capacity   s   r)   c                 C   s~   t  rt| |||S t| dddgd t|dddgd tdi t }|j| jd}|jd| |dd	|i||d
d |S )a  
    prune gate by capacity(only support CUDA)

    Args:
        gate_idx (Tensor): Represents the gate_id sequence corresponding to the input data with type int32, int64.
        expert_count (Tensor): The quantity value counted on the gate_id sequence of the input data with type int32, int64.
        n_worker(int, optional): The number of workers on the trainer with type int64.

    Returns:
        new_gate_idx (Tensor): The gate_id sequence corresponding to the new input data after passing through prune.

    Examples:
        .. code-block:: python

            >>> # doctest: +REQUIRES(env: DISTRIBUTED)
            >>> import paddle
            >>> from paddle.distributed.models.moe import utils
            >>> gate_idx = paddle.to_tensor([1, 3, 3, 3, 3, 2, 1, 1], dtype='int64')
            >>> expert_count = paddle.to_tensor([0, 3, 1, 3, 0, 0, 0, 0], dtype='int64')
            >>> n_worker = 1
            >>> n_expert = 8
            >>> new_gate_id = utils._prune_gate_by_capacity(
            ...     gate_idx, expert_count, n_expert, n_worker
            ... )
            >>> print(new_gate_id)
            Tensor(shape=[8], dtype=int64, place=Place(gpu:0), stop_gradient=True,
            [1, 3, 3, 3, -1, 2, 1, 1])
    GateIdxZint32Zint64z/paddle.distributed.utils.prune_gate_by_capacityExpertCountprune_gate_by_capacityr	   )r*   r+   Z
NewGateIdx)n_expertr(   r   N)r,   )	r   r   r,   r   r   r   r   r
   r   )Zgate_idxr&   r-   r(   r   Znew_gate_idxr   r   r   _prune_gate_by_capacity   s6   r.   N)r"   )Zpaddler   r   Zpaddle.common_ops_importr   Zpaddle.frameworkr   r   r   r   r!   r$   r)   r.   r   r   r   r   <module>   s   '
2,