o
    ü* iÇ<  ã                   @  s˜   d dl mZ d dlZd dlmZmZmZmZ d dlZ	ddl
mZ ddlmZmZmZ G dd„ deee  ƒZG d	d
„ d
eed  ƒZG dd„ deƒZdS )é    )ÚannotationsN)ÚIterableÚIteratorÚSequenceÚSizedé   )ÚIterableDataset)ÚRandomSamplerÚSamplerÚSequenceSamplerc                   @  s\   e Zd ZU dZded< ded< ded< ded< 					
		
dddd„Zddd„Zddd„Zd	S )ÚBatchSamplera–  
    A base implement of batch sampler used by `paddle.io.DataLoader`
    which yield mini-batch indices(a list/tuple with length as
    mini-batch size and holds sample indices) iterably.

    Batch sampler used by :code:`paddle.io.DataLoader` should be a subclass
    of :code:`paddle.io.BatchSampler`, BatchSampler subclasses should
    implement following methods:

    :code:`__iter__`: return mini-batch indices iterably.

    :code:`__len__`: get mini-batch number in an epoch.


    Args:
        dataset(Dataset, optional): this should be an instance of a subclass of :ref:`api_paddle_io_Dataset` or
                :ref:`api_paddle_io_IterableDataset` or other python object which implemented
                :code:`__len__` for BatchSampler to get indices as the
                range of :attr:`dataset` length. Default None, disabled.
        sampler (Sampler, Iterable, optional): this should be a :ref:`api_paddle_io_Sample` or Iterable
                instance which implemented :code:`__iter__` to generate
                sample indices. :attr:`sampler` and :attr:`dataset`
                can not be set in the same time.  If :attr:`sampler`
                is set, :attr:`dataset` should not be set. Default None, disabled.
        shuffle(bool, optional): whether to shuffle indices order before generating
                batch indices. Default False, don't shuffle indices before generating batch indices.
        batch_size(int, optional): sample indice number in a mini-batch indices. default 1, each mini-batch includes 1 sample.
        drop_last(bool, optional): whether drop the last incomplete (less than 1 mini-batch) batch dataset. Default False, keep it.
    see :ref:`api_paddle_io_DataLoader`

    Returns:
        BatchSampler: an iterable object for indices iterating

    Examples:

        .. code-block:: python

            >>> import numpy as np
            >>> from paddle.io import RandomSampler, BatchSampler, Dataset

            >>> np.random.seed(2023)
            >>> # init with dataset
            >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([784]).astype('float32')
            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
            ...         return image, label
            ...
            ...     def __len__(self):
            ...         return self.num_samples
            ...
            >>> bs = BatchSampler(dataset=RandomDataset(100),
            ...                     shuffle=False,
            ...                     batch_size=16,
            ...                     drop_last=False)
            ...
            >>> for batch_indices in bs:
            ...     print(batch_indices)
            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
            ...
            [96, 97, 98, 99]
            >>> # init with sampler
            >>> sampler = RandomSampler(RandomDataset(100))
            >>> bs = BatchSampler(sampler=sampler,
            ...                     batch_size=8,
            ...                     drop_last=True)
            ...
            >>> for batch_indices in bs:
            ...     print(batch_indices)
            [56, 12, 68, 0, 82, 66, 91, 44]
            ...
            [53, 17, 22, 86, 52, 3, 92, 33]
    zSampler[int] | Iterable[int]ÚsamplerÚintÚ
batch_sizeÚboolÚshuffleÚ	drop_lastNFr   ÚdatasetúSized | NoneúSampler | Iterable[int] | NoneÚreturnÚNonec                 C  sü   |d u r&|d usJ dƒ‚t |ttfƒsJ dt|ƒ› ƒ‚|r"J dƒ‚|| _n,t |tƒr/J dƒ‚|d u s7J dƒ‚t |tƒsEJ dt|ƒ› ƒ‚|rMt|ƒ| _nt|ƒ| _t |t	ƒr[|dksbJ d|› ƒ‚|| _
|| _t |tƒsvJ d	t|ƒ› ƒ‚|| _d
| _d S )Nz'either dataset or sampler should be setz@sampler should be either paddle.io.Sampler or Iterable, but got z+shuffle should be False when sampler is setz1dataset should not be a paddle.io.IterableDatasetz'should not set both dataset and samplerz+shuffle should be a boolean value, but got r   z1batch_size should be a positive integer, but got z-drop_last should be a boolean value, but got r   )Ú
isinstancer
   r   Útyper   r   r   r	   r   r   r   r   r   Ú
_acc_steps)Úselfr   r   r   r   r   © r   ún/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/io/dataloader/batch_sampler.pyÚ__init__m   s:   
ÿÿÿÿ
ÿÿ
zBatchSampler.__init__úIterator[list[int]]c                 c  sb    | j | j }g }| jD ]}| |¡ t|ƒ|kr|V  g }q| js-t|ƒdkr/|V  d S d S d S )Nr   )r   r   r   ÚappendÚlenr   )r   Úlocal_batch_sizeÚbatch_indicesÚidxr   r   r   Ú__iter__™   s   €

€
ÿzBatchSampler.__iter__c                 C  s6   | j | j }t| jƒ}|t| j ƒ|d  7 }|| S ©Nr   )r   r   r!   r   r   r   ©r   r"   Únum_samplesr   r   r   Ú__len__¤   s   
zBatchSampler.__len__)NNFr   F)r   r   r   r   r   r   r   r   r   r   r   r   ©r   r   ©r   r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__Ú__annotations__r   r%   r)   r   r   r   r   r      s   
 Mú
,r   c                   @  s4   e Zd ZU ded< ded< dddd	„Zddd„ZdS )Ú_InfiniteIterableSamplerr   r   r   r   r   r   r   c                 C  s"   t |tƒs	J dƒ‚|| _|| _d S )Nz:dataset should be an instance of paddle.io.IterableDataset)r   r   r   r   )r   r   r   r   r   r   r   ¯   s
   ÿ
z!_InfiniteIterableSampler.__init__úIterator[list[None]]c                 c  s    	 d g| j  V  q)N)r   ©r   r   r   r   r%   ¶   s   €ÿz!_InfiniteIterableSampler.__iter__N)r   )r   r   r   r   r   r   )r   r2   )r,   r-   r.   r0   r   r%   r   r   r   r   r1   «   s
   
 r1   c                   @  s„   e Zd ZU dZded< ded< ded< ded< ded	< ded
< ded< ded< 				dddd„Zd dd„Zd!dd„Zd"dd„ZdS )#ÚDistributedBatchSampleray	  Sampler that restricts data loading to a subset of the dataset.

    In such case, each process can pass a DistributedBatchSampler instance
    as a DataLoader sampler, and load a subset of the original dataset that
    is exclusive to it.

    .. note::
        Dataset is assumed to be of constant size.

    Args:
        dataset(Dataset): this could be an instance of subclass of :ref:`api_paddle_io_Dataset`
                     or other python object which implemented
                     `__len__` for BatchSampler to get indices of samples.
        batch_size(int): sample size of each mini-batch.
        num_replicas(int, optional): process number in distributed training.
            If :attr:`num_replicas` is None, :attr:`num_replicas` will be
            retrieved from :ref:`api_paddle_distributed_ParallelEnv` .
            Default None.
        rank(int, optional): the rank of the current process among :attr:`num_replicas`
            processes. If :attr:`rank` is None, :attr:`rank` is retrieved from
            :ref:`api_paddle_distributed_ParallelEnv`. Default None.
        shuffle(bool, optional): whether to shuffle indices order before generating
            batch indices. Default False.
        drop_last(bool, optional): whether drop the last incomplete(less than a mini-batch) batch dataset size.
            Default False.

    Returns:
        DistributedBatchSampler, return an iterable object for indices iterating.

    Examples:
        .. code-block:: python

            >>> import numpy as np

            >>> from paddle.io import Dataset, DistributedBatchSampler

            >>> # init with dataset
            >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([784]).astype('float32')
            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
            ...         return image, label
            ...
            ...     def __len__(self):
            ...         return self.num_samples
            ...
            >>> dataset = RandomDataset(100)
            >>> sampler = DistributedBatchSampler(dataset, batch_size=64)

            >>> for data in sampler:
            ...     # do something
            ...     break
    r   r   r   r   r   r   ÚnranksÚepochÚ
local_rankr(   Ú
total_sizeNFÚnum_replicasú
int | NoneÚrankr   r   r   c                 C  s  || _ t|tƒr|dksJ dƒ‚|| _t|tƒsJ dƒ‚|| _t|tƒs(J dƒ‚ddlm} |d urCt|tƒr;|dks?J dƒ‚|| _n|ƒ j| _|d ur]t|tƒrU|dksYJ dƒ‚|| _	n|ƒ j	| _	|| _
d| _tt t| j ƒd | j ¡ƒ| _| j| j | _d	| _d S )
Nr   z'batch_size should be a positive integerz!shuffle should be a boolean valuez$drop_last should be a boolean number)ÚParallelEnvz)num_replicas should be a positive integerz%rank should be a non-negative integerg      ð?r   )r   r   r   r   r   r   Zpaddle.distributedr<   r5   r7   r   r6   ÚmathÚceilr!   r(   r8   r   )r   r   r   r9   r;   r   r   r<   r   r   r   r   þ   s8   	ÿÿÿ
ÿ
 
z DistributedBatchSampler.__init__r   c           	      #  s8   ˆ j ˆ j }tˆ jƒ}t |¡ ¡ }ˆ jt|ƒ }|t|ƒkr)||d |… 7 }n||t 	|t|ƒ ¡ d |… 7 }t|ƒˆ jksCJ ‚ˆ j
rWtj ˆ j¡ 
|¡ ˆ  jd7  _‡ fdd„}ˆ jdkrf||ƒ}t|ƒˆ jksoJ ‚t|ƒ}g }|D ]}| |¡ t|ƒ|kr‰|V  g }qwˆ js˜t|ƒdkrš|V  d S d S d S )Nr   c                   s®   g }ˆ j ˆ jˆ j  }|ˆ j dksJ ‚|ˆ j }tˆ jˆ j t| ƒ| ˆ jˆ j ƒD ]}| | ||ˆ j … ¡ q+| t| ƒ| d … } | | ˆ j| ˆ jd | … ¡ |S )Nr   r   )r8   r   r5   Úranger7   r!   Úextend)ÚindicesZsubsampled_indicesZlast_batch_sizeZlast_local_batch_sizeÚir3   r   r   Ú_get_indices_by_batch_sizeA  s(   


ýýÿÿ	zDDistributedBatchSampler.__iter__.<locals>._get_indices_by_batch_sizer   )r   r   r!   r   ÚnpZarangeÚtolistr8   r=   r>   r   ÚrandomZRandomStater6   r5   r(   Úiterr    r   )	r   r"   r(   rA   Zpadding_sizerC   Z_sample_iterr#   r$   r   r3   r   r%   .  s:   €
ÿ

€
ÿz DistributedBatchSampler.__iter__c                 C  s2   | j | j }| j}|t| j ƒ|d  7 }|| S r&   )r   r   r(   r   r   r'   r   r   r   r)   i  s   zDistributedBatchSampler.__len__c                 C  s
   || _ dS )a¤  
        Sets the epoch number. When :attr:`shuffle=True`, this number is used
        as seeds of random numbers. By default, users may not set this, all
        replicas (workers) use a different random ordering for each epoch.
        If set same number at each epoch, this sampler will yield the same
        ordering at all epochs.

        Arguments:
            epoch (int): Epoch number.

        Examples:
            .. code-block:: python

                >>> import numpy as np

                >>> from paddle.io import Dataset, DistributedBatchSampler

                >>> # init with dataset
                >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
                ...     def __init__(self, num_samples):
                ...         self.num_samples = num_samples
                ...
                ...     def __getitem__(self, idx):
                ...         image = np.random.random([784]).astype('float32')
                ...         label = np.random.randint(0, 9, (1, )).astype('int64')
                ...         return image, label
                ...
                ...     def __len__(self):
                ...         return self.num_samples
                ...
                >>> dataset = RandomDataset(100)
                >>> sampler = DistributedBatchSampler(dataset, batch_size=64)

                >>> for epoch in range(10):
                ...     sampler.set_epoch(epoch)
        N)r6   )r   r6   r   r   r   Ú	set_epocho  s   
%z!DistributedBatchSampler.set_epoch)NNFF)r   r   r   r   r9   r:   r;   r:   r   r   r   r   r   r   r*   r+   )r6   r   r   r   )	r,   r-   r.   r/   r0   r   r%   r)   rH   r   r   r   r   r4   »   s$   
 9ù
0
;r4   )Ú
__future__r   r=   Úcollections.abcr   r   r   r   ÚnumpyrD   r   r   r   r	   r
   r   r   r   r1   r4   r   r   r   r   Ú<module>   s    