o
    pi                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZg ZejdZeeejsH	 e	 ZejedddZdd Zee dd	 Zdd
dZdd ZdejfddZejfddZdddZdS )    N~z.cachepaddledatasetc              
   C   sF   zt t W d S  ty" } z|jtjkr W Y d }~d S d }~ww N)osmakedirs	DATA_HOMEOSErrorerrnoEEXIST)pathexc r   \/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/paddle/dataset/common.pymust_mkdirs6   s   r   c                    sD   t  }t| d t fdddD ]}|| q   | S )Nrbc                      s
     dS )N   )readr   fr   r   <lambda>D   s   
 zmd5file.<locals>.<lambda>    )hashlibmd5openiterupdateclose	hexdigest)fnameZhash_md5chunkr   r   r   md5fileA   s   
r!   c              
   C   s  t d| }t|trt d| }tjt|}tj	|s't
| tj||d u r6| dd n|}tj	|rGt||krG|S d}d}tj	|rXt||ksQtj	|rmtjdt| d| d	 ||k rv|d
7 }n
td|  d| tjd| d|  d tjd ztjd| d dd}|jd}	|	d u rt|d}
t|j|
 W d    n1 sw   Y  n[t|dM}
d}t|	}	|	| d
 }|dkr|d nd
}d}tjjj|dd}|j|dD ]}|
| |d
7 }||i  || dkr|| qW d    n	1 sw   Y  W d    n	1 s+w   Y  W n t yD } zW Y d }~qKd }~ww tj	|rXt||krXtjd tj!"  |S )Nz^[a-zA-Z0-9_/\-]+$z^(?:(?!\.\.)[a-zA-Z0-9_/\.-])+$/r      zfile z  md5 
   zCannot download z within retry limit zCache file z not found, downloading z 
zBegin to download
GETT)timeoutZfollow_redirectszcontent-lengthwbr      item)name)
chunk_sizez
Download finished
)#rematchgroup
isinstancestrr   r   joinr   existsr   splitr!   sysstderrwriteRuntimeErrorhttpxstreamheadersgetr   shutilcopyfileobjrawintr   ZhapiZprogressbarProgressBarZ
iter_bytesr   	Exceptionstdoutflush)urlmodule_nameZmd5sumZ	save_namedirnamefilenameretryZretry_limitrtotal_lengthr   r-   Z
total_iterZlog_intervalZ	log_indexbardataer   r   r   downloadJ   s   




,
rP   c                  C   sJ   dd t tjD D ]} dt td|  v r"td|    q
d S )Nc                 S   s   g | ]	}| d s|qS )__)
startswith).0xr   r   r   
<listcomp>   s
    
zfetch_all.<locals>.<listcomp>fetchzpaddle.dataset.)dirr   r   	importlibimport_modulerV   )rG   r   r   r   	fetch_all   s   
rZ   z%05d.picklec           	   	   C   s   t |stdg }d}t|  D ]5\}}|| ||krF|| dkrFt|| d}||| g }|d7 }W d   n1 sAw   Y  q|rht|| d}||| W d   dS 1 saw   Y  dS dS )a  
    you can call the function as:

    split(paddle.dataset.cifar.train10(), line_count=1000,
        suffix="imikolov-train-%05d.pickle")

    the output files as:

    |-imikolov-train-00000.pickle
    |-imikolov-train-00001.pickle
    |- ...
    |-imikolov-train-00480.pickle

    :param reader: is a reader creator
    :param line_count: line count for each file
    :param suffix: the suffix for the output files, should contain "%d"
                means the id for each file. Default is "%05d.pickle"
    :param dumper: is a callable function that dump object to file, this
                function will be called as dumper(obj, f) and obj is the object
                will be dumped, f is a file object. Default is cPickle.dump.
    zdumper should be callable.r   wr&   N)callable	TypeError	enumerateappendr   )	reader
line_countsuffixZdumperlinesZindx_fidr   r   r   r   r5      s$   


"r5   c                    s    fdd}|S )a  
    Create a reader that yield element from the given files, select
    a file set according trainer count and trainer_id

    :param files_pattern: the files which generating by split(...)
    :param trainer_count: total trainer count
    :param trainer_id: the trainer rank id
    :param loader: is a callable function that load object from file, this
                function will be called as loader(f) and f is a file object.
                Default is cPickle.load
    c               	   3   s    t s	tdt } |   g }t| D ]\}}| kr.td|  || q|D ] }t|d}|}|E d H  W d    n1 sLw   Y  q1d S )Nzloader should be callable.zappend file: rK   )r\   r]   globsortr^   printr_   r   )	file_listZmy_file_listidxfnr   rc   files_patternloadertrainer_count
trainer_idr   r   r`      s$   

z$cluster_files_reader.<locals>.readerr   )rm   ro   rp   rn   r`   r   rl   r   cluster_files_reader   s   rq   Tc                 C   s8   | r
t j| r
| S |rtjj|||S t|  d)Nz& not exists and auto download disabled)r   r   r4   r   r   commonrP   
ValueError)r   rF   r   rG   rP   r   r   r   _check_exists_and_download   s
   rt   r   )T)r
   rf   r   rX   r   pickler.   r>   r6   tempfiler:   r   Zpaddle.dataset__all__r   
expanduserHOMEaccessW_OK
gettempdirr3   r   r   r!   rP   rZ   dumpr5   loadrq   rt   r   r   r   r   <module>   s8   
	D
'
!