o
    * i6                     @  s   d dl mZ d dlmZmZmZmZ d dlZddl	m
Z
 ddlmZ er<d dlmZmZmZmZ d dlmZ d dlmZ ed	ZG d
d dee ZG dd dee ZG dd dee ZdddZG dd dee ZG dd dee ZdS )    )annotations)TYPE_CHECKINGAnyGenericTypeVarN   )core)randperm)	GeneratorIteratorSequenceSized)Tensor_Tc                   @  sB   e Zd ZU dZded< ddddZdd
dZerdddZdS dS )Samplera  
    An abstract class to encapsulate methods and behaviors of samplers.

    All sampler used by :code:`paddle.io.BatchSampler` should be a subclass
    of :code:`paddle.io.Sampler`, BatchSampler subclasses should
    implement following methods:

    :code:`__iter__`: return sample index iterably, which iterate over indices
    of dataset elements

    :code:`__len__`: the number of sample in :attr:`data_source`


    Args:
        data_source(Dataset, optional): this could be an instance of
                :code:`paddle.io.Dataset` other Python object which
                implemented :code:`__len__` for Sampler to get indices
                as the range of :attr:`dataset` length. Default None.

    Returns:
        Sampler: an iterable object for sample indices iterating

    Examples:

        .. code-block:: python

            >>> import numpy as np
            >>> from paddle.io import Dataset, Sampler

            >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([784]).astype('float32')
            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
            ...         return image, label
            ...
            ...     def __len__(self):
            ...         return self.num_samples
            ...
            >>> class MySampler(Sampler):  # type: ignore[type-arg]
            ...     def __init__(self, data_source):
            ...         self.data_source = data_source
            ...
            ...     def __iter__(self):
            ...         return iter(range(len(self.data_source)))  # type: ignore[arg-type]
            ...
            ...     def __len__(self):
            ...         return len(self.data_source)  # type: ignore[arg-type]
            ...
            >>> sampler = MySampler(data_source=RandomDataset(100))

            >>> for index in sampler:
            ...     print(index)
            0
            1
            2
            ...
            99

    see `paddle.io.BatchSampler`
    see `paddle.io.DataLoader`

    Sized | Nonedata_sourceNreturnNonec                 C  
   || _ d S Nr   selfr    r   h/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/io/dataloader/sampler.py__init__m      
zSampler.__init__Iterator[_T]c                 C  s   t r   )NotImplementedErrorr   r   r   r   __iter__p   s   zSampler.__iter__intc                 C  s   d S r   r   r    r   r   r   __len__w   s    zSampler.__len__r   )r   r   r   r   )r   r   r   r"   )	__name__
__module____qualname____doc____annotations__r   r!   r   r#   r   r   r   r   r   (   s   
 B
r   c                   @  8   e Zd ZU dZded< dddZdd	d
ZdddZdS )SequenceSamplera4  
    Iterate samples sequentially, yield :code:`0, 1, 2, ..., len(data_source) -1`
    generally,

    Args:
        data_source(Dataset): dataset to sample, this could be an
                instance of :code:`paddle.io.Dataset` other Python
                object which implemented :code:`__len__`.

    Returns:
        Sampler: a Sampler yield sample index sequentially

    Examples:

        .. code-block:: python

            >>> import numpy as np
            >>> from paddle.io import Dataset, SequenceSampler

            >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([784]).astype('float32')
            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
            ...         return image, label
            ...
            ...     def __len__(self):
            ...         return self.num_samples
            ...
            >>> sampler = SequenceSampler(data_source=RandomDataset(100))

            >>> for index in sampler:
            ...     print(index)
            0
            1
            2
            ...
            99

    see `paddle.io.Sampler`
    r   r   r   r   c                 C  r   r   r   r   r   r   r   r      r   zSequenceSampler.__init__Iterator[int]c                 C  s   t tt| jS r   )iterrangelenr   r    r   r   r   r!      s   zSequenceSampler.__iter__r"   c                 C  
   t | jS r   )r/   r   r    r   r   r   r#      r   zSequenceSampler.__len__N)r   r   r   r   r   r,   r$   r%   r&   r'   r(   r)   r   r!   r#   r   r   r   r   r+   z   s   
 ,

r+   c                   @  s^   e Zd ZU dZded< ded< ded< 					ddddZedddZdddZdddZ	d	S )RandomSamplera  
    Iterate samples randomly, yield shuffled indices, if :attr:`replacement=False`,
    yield shuffled indices of the whole data source, if :attr:`replacement=True`,
    :attr:`num_samples` can set to specify the sample number to draw.

    Args:
        data_source(Dataset): dataset to sample, this could be an
                instance of :ref:`api_paddle_io_Dataset` or :ref:`api_paddle_io_IterableDataset` or other Python
                object which implemented :code:`__len__` to get indices as the range of :code:`dataset` length. Default None.
        replacement(bool, optional): If False, sample the whole dataset, If True,
                set :attr:`num_samples` for how many samples to draw. Default False.
        num_samples(int, optional): set sample number to draw. Default None, which is set to the length of `data_source`.
        generator(Generator, optional): specify a generator to sample the :code:`data_source`. Default None, disabled.

    Returns:
        RandomSampler: a Sampler yield sample index randomly.

    Examples:

        .. code-block:: python

            >>> import numpy as np
            >>> from paddle.io import Dataset, RandomSampler

            >>> np.random.seed(2023)
            >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([784]).astype('float32')
            ...         label = np.random.randint(0, 9, (1, )).astype('int64')
            ...         return image, label
            ...
            ...     def __len__(self):
            ...         return self.num_samples
            ...
            >>> sampler = RandomSampler(data_source=RandomDataset(100))

            >>> for index in sampler:
            ...     print(index)
            56
            12
            68
            ...
            87
    r   r   boolreplacement!Generator[int, None, None] | None	generatorFNnum_samples
int | Noner   r   c                 C  s   || _ || _|| _|| _t| jtstd| j | js3| jt| j kr3t	d| j dt| j  t| jt
r>| jdkrFt	d| j d S )Nz:expect boolean value for replacement, but got replacement=zunum_samples should be smaller than or equal to length of data_source when replacement is False, but got num_samples: z > data_source: r   z>num_samples should be a positive integer, but got num_samples=)r   r5   _num_samplesr7   
isinstancer4   	TypeErrorr8   r/   
ValueErrorr"   )r   r   r5   r8   r7   r   r   r   r      s2   zRandomSampler.__init__r"   c                 C  s   | j d u r
t| jS | j S r   )r:   r/   r   r    r   r   r   r8     s   

zRandomSampler.num_samplesr,   c              	   c  s    t | j}| jr)t| jD ]}zt| j}W n ty"   Y  d S w |V  qd S | jrCtj	j
t|| jdd D ]}|V  q;d S tj	j
t|| jdd D ]}|V  qRd S )NT)replaceF)r/   r   r7   r.   r8   nextStopIterationr5   nprandomchoiceZarangetolist)r   niindexr   r   r   r!     s4   
zRandomSampler.__iter__c                 C  s   | j S r   )r8   r    r   r   r   r#   !  s   zRandomSampler.__len__)FNN)
r   r   r5   r4   r8   r9   r7   r6   r   r   r$   r1   )
r%   r&   r'   r(   r)   r   propertyr8   r!   r#   r   r   r   r   r3      s   
 0
r3   Tc                 C  sJ  t | tjr
|  } t | ttfrt| } t | tjs J dt	| j
dks+J d| d| j
d f} t| dks@J dt| tjkrLJ dt| tjkrXJ dtj| dkd	d
}t|dkslJ d|syt||ksyJ d| | jd	d
 } g }t| j
d D ]}tj| j
d	 ||| | }|| qt|S )Nz=weights should be paddle.Tensor, numpy.ndarray, list or tuple   z$weights should be a 1-D or 2-D arrayg        z weights should be positive valuezweights should not be INFzweights should not be NaN   )Zaxisr   z#weights should have positive valueszUweights positive value number should not less than num_samples when replacement=False)r;   r   ZDenseTensornumpylisttuplerA   arrayZndarrayr/   shapereshapeallanyinfnansumr.   rB   rC   append)weightsr8   r5   Z	non_zerosZretsrF   retr   r   r   _weighted_sample%  s4   

rZ   c                   @  sL   e Zd ZU dZded< ded< ded< 	ddddZdddZdddZdS )WeightedRandomSampleraH  
    Random sample with given weights (probabilities), sample index will be in range
    [0, len(weights) - 1], if :attr:`replacement` is True, index can be sampled
    multiple times.

    Args:
        weights(numpy.ndarray|paddle.Tensor|list|tuple): sequence of weights,
                should be numpy array, paddle.Tensor, list or tuple
        num_samples(int): set sample number to draw from sampler.
        replacement(bool): Whether to draw sample with replacements, default True

    Returns:
        Sampler: a Sampler yield sample index randomly by given weights

    Examples:

        .. code-block:: python

            >>> import numpy as np
            >>> from paddle.io import WeightedRandomSampler

            >>> np.random.seed(2023)
            >>> sampler = WeightedRandomSampler(
            ...     weights=[0.1, 0.3, 0.5, 0.7, 0.2],
            ...     num_samples=5,
            ...     replacement=True
            ... )
            >>> for index in sampler:
            ...     print(index)
            2
            4
            3
            1
            1
    +npt.NDArray[Any] | Tensor | Sequence[float]rX   r"   r8   r4   r5   Tr   r   c                 C  sB   t |tr	|dkrtdt |tstd|| _|| _|| _d S )Nr   z(num_samples should be a positive integerz%replacement should be a boolean value)r;   r"   r=   r4   rX   r8   r5   )r   rX   r8   r5   r   r   r   r   n  s   

zWeightedRandomSampler.__init__r,   c                 C  s$   t | j| j| j}t|d S NrJ   )rZ   rX   r8   r5   r-   rQ   rD   )r   Zidxsr   r   r   r!   |  s   zWeightedRandomSampler.__iter__c                 C  s$   t | jj| jjd  }| j| S r]   )rA   prodrX   rP   r8   )r   mulr   r   r   r#     s   
zWeightedRandomSampler.__len__NT)rX   r\   r8   r"   r5   r4   r   r   r1   r$   r2   r   r   r   r   r[   E  s   
 $
r[   c                   @  r*   )SubsetRandomSamplera  
    Randomly sample elements from a given list of indices, without replacement.

    Args:
        indices (sequence): a sequence of indices

    Examples:

        .. code-block:: python

            >>> import paddle
            >>> from paddle.io import SubsetRandomSampler

            >>> paddle.seed(2023)
            >>> sampler = SubsetRandomSampler(indices=[1, 3, 5, 7, 9])

            >>> for index in sampler:
            ...     print(index)
            9
            3
            7
            5
            1

    Sequence[int]indicesr   r   c                 C  s   t |dkr
td|| _d S )Nr   zHThe length of `indices` in SubsetRandomSampler should be greater than 0.)r/   r=   rc   )r   rc   r   r   r   r     s
   
zSubsetRandomSampler.__init__r,   c                 c  s&    t t| jD ]}| j| V  qd S r   )r	   r/   rc   )r   rF   r   r   r   r!     s   zSubsetRandomSampler.__iter__r"   c                 C  r0   r   )r/   rc   r    r   r   r   r#     r   zSubsetRandomSampler.__len__N)rc   rb   r   r   r1   r$   r2   r   r   r   r   ra     s   
 

ra   r`   )
__future__r   typingr   r   r   r   rL   rA   Z	frameworkr   Ztensorr	   collections.abcr
   r   r   r   Znumpy.typingZnptZpaddler   r   r   r"   r+   r3   rZ   r[   ra   r   r   r   r   <module>   s    R9
r B