o
    * i>\                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	m
Z
mZmZmZmZmZ d dlZddlmZmZmZ ddlmZmZ ddlmZmZmZ dd	lmZmZ dd
lm Z m!Z!m"Z" e	rd dl#Z#d dl$m%Z%m&Z& d dl'mZ( d dlm)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ d dl0m1Z1 edZ2edZ3G dd deZ4dZ5da6da7da8d"ddZ9dd Z:dd Z;G dd dZ<G d d! d!Z=dS )#    )annotationsN)TYPE_CHECKINGAnyAnyStrCallableProtocolTypeVaroverload   )_current_expected_place_get_paddle_place_get_paddle_place_list)corein_dynamic_mode   )BatchSamplerIterableDatasetSubset)DistributedBatchSampler_InfiniteIterableSampler)_DataLoaderIterMultiProcess_DataLoaderIterSingleProcess_DatasetKind)MappingSequence)Tensor)	PlaceLike)_Place)_DataLoaderIterBase)Dataset_K_Vc                   @  sR   e Zd ZedddZedd	dZedddZedddZedddZdS )
_CollateFnbatch5Sequence[npt.NDArray[Any]] | Sequence[numbers.Number]returnnpt.NDArray[Any]c                 C     d S N selfr#   r)   r)   \/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/paddle/io/reader.py__call__C      z_CollateFn.__call__Sequence[Tensor]r   c                 C  r'   r(   r)   r*   r)   r)   r,   r-   H      Sequence[AnyStr]r   c                 C  r'   r(   r)   r*   r)   r)   r,   r-   K   r0   Sequence[Mapping[_K, _V]]Mapping[_K, _V]c                 C  r'   r(   r)   r*   r)   r)   r,   r-   N   r.   Sequence[Sequence[_V]]Sequence[_V]c                 C  r'   r(   r)   r*   r)   r)   r,   r-   S   r0   N)r#   r$   r%   r&   )r#   r/   r%   r   )r#   r1   r%   r   )r#   r2   r%   r3   )r#   r4   r%   r5   )__name__
__module____qualname__r	   r-   r)   r)   r)   r,   r"   B   s    r"   <   F  c                 C  s   | a |ad S r(   )USE_AUTOTUNETUNING_STEPS)Zuse_autotuneZtuning_stepsr)   r)   r,   set_autotune_configa   s   r=   c                  G  s:   t | dkrtS t | dkrt| d tsJ | d ad S )Nr   r   )lenUSE_PINNED_MEMORY
isinstancebool)argsr)   r)   r,   use_pinned_memoryh   s   rC   c                 C  sR   t | ttfs
| g} g }| D ]}t |tjs!t }|| |}|| q|S r(   )r@   listtupler   ZPlaceZ	set_placeappend)placesretptmpr)   r)   r,   _convert_placesq   s   
rK   c                   @  sD   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dS )AuToTunec                 C  s   || _ t d | _d S )Nr
   )loadermultiprocessing	cpu_countmax_num_worker)r+   rM   r)   r)   r,   __init__   s   zAuToTune.__init__c                 C  s,  t r|  s
| jjS |  }|d u r| jjS t }td tdt| jj  d}t	d}tdt| j
  d}|| j
k r|||_| |}|d |krT|}|}n| |||| j
}||krbn|}tdt| d t|  |d	7 }|| j
k sAtd
t|  tdtt |  d  |S )Nz(========= DataLoader Auto Tune =========zUser config for DataLoader: r   infz"Tuning Range for num_workers: 0 ~ g      ?znum_workers:  avg_cost: r
   z'auto_tune dataLoader best_num_workers: z AutoTuning Cost for DataLoader: z seconds)r;   need_autotunerM   num_workersget_autotune_loadertimeloggingdebugstrfloatrP   evaluate_reader_costis_bestinfo)r+   Zauto_tune_loaderZauto_tune_startZbest_num_workersZmin_costrU   avg_costZ
update_numr)   r)   r,   r-      sj   




zAuToTune.__call__c                 C  s   t jdks
t jdkrdS dS )Ndarwinwin32FT)sysplatformr+   r)   r)   r,   rT      s   zAuToTune.need_autotunec                 C  s*   t |t t|}t|tt|d}|S )N)indices)minr<   r>   r   rD   range)r+   dataset
batch_sizeZnum_samplessub_datasetr)   r)   r,   get_sub_dataset   s   zAuToTune.get_sub_datasetc                 C  s   t  | j}| jjj}t| jjtjjr:| jjj}| 	||}tjj||| jjj
| jjj| jjj| jjjd|_|S t| jjtjjr^| jjjj}| 	||}tjj||| jjjd|_|S d }|S )Nrh   ri   Znum_replicasZrankshuffle	drop_last)rh   ri   rn   )copyrM   batch_samplerri   r@   paddleior   rh   rk   ZnranksZ
local_rankrm   rn   r   ZsamplerZdata_source)r+   rM   ri   rh   rj   r)   r)   r,   rV      s6   

zAuToTune.get_autotune_loaderc                 C  s   g }d}t   }t|D ]\}}|t   |  t   }qt|dkr6t|dd  t|dd   }|S t|dd  t|dd   }|S )Nr   r
   )rW   	enumeraterF   r>   sum)r+   readerZcostsr_   startidatar)   r)   r,   r\      s   
  zAuToTune.evaluate_reader_costc           	      C  s   d}|d }d}||k rF|dk rF|| j _| |}tdt| d t|  |d7 }||d | k r6|S |d7 }|d9 }||k rF|dk s|S )Nr   r      zfor back num_workers: rS   gffffff?g?)rM   rU   r\   rX   rY   rZ   )	r+   ru   Zbest_workersZ	best_timeZnum_work_boundarysteprU   boundaryrW   r)   r)   r,   r]      s,   
zAuToTune.is_bestN)
r6   r7   r8   rQ   r-   rT   rk   rV   r\   r]   r)   r)   r)   r,   rL      s    :rL   c                   @  s   e Zd ZU dZded< ded< ded< ded< d	ed
< ded< ded< ded< ded< ded< ded< ded< ded< ded< ded< 															d0d1d'd(Zd2d)d*Zd3d,d-Zd3d.d/ZdS )4
DataLoadera!  
    DataLoader provides an iterator which iterates given dataset
    once by the batch_sampler.

    DataLoader supports single-process and multi-process data loading,
    multi-process workers will be used to load data asynchronously if
    :attr:`num_workers` is set as a positive number.

    DataLoader supports map-style dataset and iterable-style dataset.

    For map-style dataset(can get a sample from dataset with a given
    index), please see :code:`paddle.io.Dataset`.

    For iterable-style dataset(get samples from dataset iteratively,
    like a Python iterator), please see :code:`paddle.io.IterableDataset`.

    For :code:`batch_sampler` please see :code:`paddle.io.BatchSampler`

    Notes:
        GPU tensor operation is not supported in subprocess currently,
        please don't use GPU tensor operations in pipeline which will
        be performed in subprocess, such as dataset transforms, collate_fn,
        etc. Numpy array and CPU tensor operation is supported.

    **Disable automatic batching**

    In certain cases such as some NLP tasks, instead of automatic batching,
    handling batching manually in dataset is needed by users. For these
    cases, automatic batching is disabled if both :attr:`batch_size` and
    :attr:`batch_sampler` is set as None, each data got from :attr:`dataset`
    should be batched data and will be processed with function define by
    :attr:`collate_fn` or :attr:`default_collate_fn`.


    Notes:
        When automatic batching is disabled, :attr:`default_collate_fn` will
        do nothing to data from dataset.


    Args:
        dataset(Dataset): the dataset to load data from, should be an
            instance of subclass of :code:`paddle.io.Dataset` or
            :code:`paddle.io.IterableDataset`.
        feed_list (list(Tensor)|tuple(Tensor)|None, optional): feed Tensor list.
            The Tensors should be created by :code:`paddle.static.data()`.
            :attr:`feed_list` must be set if :attr:`return_list` is
            False. Default None.
        places(list(Place)|tuple(Place)|list(str)|None, optional): a list of Place,
            to put data onto, :attr:`places` can be None, if
            :attr:`places` is None, default place(CPUPlace or CUDAPlace(0))
            will be used. Default None. If ``places`` is list of string,
            the string in the list can be ``cpu``, ``gpu:x`` and ``gpu_pinned``,
            where ``x`` is the index of the GPUs.
        return_list (bool, optional): whether the return value on each device is
            presented as a list. If :attr:`return_list=False`, the return
            value on each device would be a dict of str -> Tensor, where
            the key of the dict is the name of each fed Tensors. If
            :attr:`return_list=True`, the return value on each device would
            be a list(Tensor). :attr:`return_list` can only be True
            in dynamic graph mode. Default True.
        batch_sampler(BatchSampler|None, optional): an instance of `paddle.io.BatchSampler`
            to generate batch indices to draw samples from :attr:`dataset`
            and combine a batch. Default None.
        batch_size(int|None, optional): sample number in a mini-batch, a substitution
            parameter for :attr:`batch_sampler`, if :attr:`batch_sampler`
            is not set, a default `paddle.io.BatchSampler` will be used
            and initialize by :attr:`batch_size`, :attr:`shuffle` and
            :attr:`drop_last`. Default 1.
        shuffle(bool, optional): whether to shuffle indices order before generate
            batch indices, a substitution parameter for :attr:`batch_sampler`
            see :attr:`batch_size`. Default False.
        drop_last(bool, optional): whether drop the last incomplete batch dataset size
            is not divisible by the batch size, a substitution parameter
            for :attr:`batch_sampler`, see :attr:`batch_size`. Default False
        collate_fn(Callable|None, optional): function to generate mini-batch data by merging
            the sample list, None for only stack each fields of sample in axis
            0(same as :attr::`np.stack(..., axis=0)`). Default None
        num_workers(int, optional): the number of subprocess to load data, 0 for no
            subprocess used and loading data in main process. Default 0
        use_buffer_reader (bool, optional): whether to use buffered reader.
            If use_buffer_reader=True, the DataLoader would prefetch
            batch data asynchronously, so it would speed up data feeding
            and occupies a little more CPU or GPU memory, i.e., the memory
            of one batch input data. Default True.
        prefetch_factor (int, optional): Number of batch data the DataLoader would prefetch
            if use_buffer_reader=True. Default 2.
        use_shared_memory (bool, optional): whether to use shared memory to speed up
            putting data into inter-process queue, set :attr:`use_shared_memory`
            as True only when the shared memory space on your machine(e.g.
            space of '/dev/shm' on Linux operating system) is large enough.
            Shared memory will only be enabled in multi-process mode(num_workers
            > 0). Default True.
        timeout(int, optional): the timeout value for getting data form output queue
            of subprocesses. Default 0.
        worker_init_fn(Callable|None, optional): init function which will be called with
            worker id on each subprocess starting if not set as None. Default
            None.
        persistent_workers(bool, optional): whether to keep the workers in the DataLoader. Default False.

    Returns:
        DataLoader: an iterable object for data iterating, each element of the generated data is a Tensor.

    Examples:

        .. code-block:: python

            >>> # doctest: +SOLO('can not use multiprocessing testing `paddle.io.DataLoader`')
            >>> import numpy as np

            >>> import paddle
            >>> import paddle.nn as nn
            >>> import paddle.nn.functional as F
            >>> from paddle.io import Dataset, BatchSampler, DataLoader

            >>> BATCH_NUM = 20
            >>> BATCH_SIZE = 16
            >>> EPOCH_NUM = 4

            >>> IMAGE_SIZE = 784
            >>> CLASS_NUM = 10

            >>> # define a random dataset
            >>> class RandomDataset(Dataset):  # type: ignore[type-arg]
            ...     def __init__(self, num_samples):
            ...         self.num_samples = num_samples
            ...
            ...     def __getitem__(self, idx):
            ...         image = np.random.random([IMAGE_SIZE]).astype('float32')
            ...         label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64')
            ...         return image, label
            ...
            ...     def __len__(self):
            ...         return self.num_samples
            ...
            >>> dataset = RandomDataset(BATCH_NUM * BATCH_SIZE)

            >>> class SimpleNet(nn.Layer):
            ...     def __init__(self):
            ...         super().__init__()
            ...         self.fc = nn.Linear(IMAGE_SIZE, CLASS_NUM)
            ...
            ...     def forward(self, image, label=None):
            ...         return self.fc(image)
            ...
            >>> simple_net = SimpleNet()
            >>> opt = paddle.optimizer.SGD(learning_rate=1e-3,
            ...                             parameters=simple_net.parameters())
            ...
            >>> loader = DataLoader(dataset,
            ...                     batch_size=BATCH_SIZE,
            ...                     shuffle=True,
            ...                     drop_last=True,
            ...                     num_workers=2)
            ...
            >>> for e in range(EPOCH_NUM):
            ...     for i, (image, label) in enumerate(loader()):
            ...         out = simple_net(image)
            ...         loss = F.cross_entropy(out, label)
            ...         avg_loss = paddle.mean(loss)
            ...         avg_loss.backward()
            ...         opt.minimize(avg_loss)
            ...         simple_net.clear_gradients()
            ...         print("Epoch {} batch {}: loss = {}".format(e, i, np.mean(loss.numpy())))

    Notes:
        For reading iterable dataset with multiprocess Dataloader,
        please see :code:`paddle.io.IterableDataset`
    rA   return_list_CollateFn | None
collate_fnuse_buffer_readerintprefetch_factorCallable[[int], None] | Noneworker_init_fnDataset[Any]rh   Sequence[Tensor] | None	feed_listzlist[_Place]rG   rU   r   dataset_kinduse_shared_memorytimeoutz.BatchSampler | _InfiniteIterableSampler | Nonerp   rn   auto_collate_batchNTr   Fr   r
   &PlaceLike | Sequence[PlaceLike] | NoneBatchSampler | Noneri   rm   persistent_workersr%   Nonec                 C  s  || _ |	| _|| _|| _|| _|| _|st s|d usJ d|| _|d u r)t }t	|t
tfr5t|}nt|}t|| _|
dksFJ d|
dkr[tjdksTtjdkr[td d}
|
| _|dksfJ d|| _|rr|
dkrrd| _|dkszJ d	|| _t	|trtj| _|rtd
| |d urtdntj| _|d ur|dkr|s|rJ d|| _d | _n+|d u rd | _d | _n |dksJ d|| _t	|trt||| _n	t ||||d| _t!j"j#j$% rFt!j"j&j'( }|d u rt!j") }t!j"j*t
t+d|dgd}d|j,vrtd|-dt!j". }|/d}t0| jj| | _t	| jtr2d}d}n| jj1}| jj2}t3|| j||||d| _|| _2| jd u| _4d| _5t rbt6 d u r^dnt6 | _5|| _7d | _8t9| : | _d S )Nz.feed_list should be set when return_list=Falser   z*num_workers should be a non-negative valuer`   ra   zDataLoader with multi-process mode is not supported on MacOs and Windows currently. Please use single-process mode with num_workers = 0 insteadz*prefetch_factor should be a positive valueFz&timeout should be a non-negative valuez5IterableDataset not support shuffle, but got shuffle=z0IterableDataset expect unspecified batch_samplerr   zJbatch_size/shuffle/drop_last should not be set when batch_sampler is givenzMbatch_size should be None or a positive value when batch_sampler is not given)rh   ri   rm   rn   Zdp)	dim_namesz;Auto-DP mode requires the mesh to include a 'dp' dimension.rl   T);r}   r   r   r   r   rh   r   r   r   r@   rD   rE   r   r   rK   rG   rb   rc   warningswarnrU   r   r   r   r   ITERr   
ValueErrorZMAPrp   ri   r   r   rq   distributedZauto_parallelZauto_dp_utilsZin_auto_dp_modeZfleetautoZget_meshZget_world_sizeZProcessMeshrg   r   Zget_rank_by_dim_and_process_idZget_rankZget_dim_sizer   rm   rn   r   r   Z
pin_memoryrC   _persistent_workers	_iteratorrL   r-   )r+   rh   r   rG   r}   rp   ri   rm   rn   r   rU   r   r   r   r   r   r   ZmeshZ	word_sizeZdp_rankZdp_world_sizer)   r)   r,   rQ     s   






	



	zDataLoader.__init__c                 C  s.   | j tjkr
td| jrt| jS t| jS )Nz'length of IterableDataset not supported)r   r   r   r   r   r>   rp   rh   rd   r)   r)   r,   __len__Z  s
   

zDataLoader.__len__r   c                 C  sJ   | j dkr	t| S | jr!| jd u rt| | _| jS | j  | jS t| S )Nr   )rU   r   r   r   r   _resetrd   r)   r)   r,   __iter__c  s   



zDataLoader.__iter__c                 C  s   |   S r(   )r   rd   r)   r)   r,   r-   o  s   zDataLoader.__call__)NNTNr   FFNr   Tr
   Tr   NF)"rh   r   r   r   rG   r   r}   rA   rp   r   ri   r   rm   rA   rn   rA   r   r~   rU   r   r   rA   r   r   r   rA   r   r   r   r   r   rA   r%   r   )r%   r   )r%   r   )	r6   r7   r8   __doc____annotations__rQ   r   r   r-   r)   r)   r)   r,   r|   	  sL   
  * 

	r|   )r:   )>
__future__r   ro   rX   rN   rb   rW   r   typingr   r   r   r   r   r   r	   rq   Zbase.frameworkr   r   r   Z	frameworkr   r   Z
dataloaderr   r   r   Zdataloader.batch_samplerr   r   Zdataloader.dataloader_iterr   r   r   numberscollections.abcr   r   Znumpy.typingZnptr   Zpaddle._typingr   Zpaddle._typing.device_liker   Z$paddle.io.dataloader.dataloader_iterr   Zpaddle.io.dataloader.datasetr   r    r!   r"   ZQUEUE_GET_TIMEOUTr?   r;   r<   r=   rC   rK   rL   r|   r)   r)   r)   r,   <module>   sH   $

	 
