o
    rqi2a                  %   @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZmZmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ d d	lmZ d
dlmZmZ d
dl m!Z! d
dl"m#Z# d
dl$m%Z%m&Z&m'Z' d
dl(m)Z) d
dl*m+Z+m,Z,m-Z-m.Z. e Z/dddddddddddddeddfde0dee0 dee0e	df deeee0f  dee1 dee deee0ee0 f  deee0ee0 f  dee0 deeee0 e0f  deeee0 e0f  de2de0d ee0 d!ee1 d"eee!  d#e0f"d$d%Z3edddddddddddfd&e0dee0 dee0e	df dee0 deeee0f  dee1 dee deee0ee0 f  deee0ee0 f  deeee0 e0f  deeee0 e0f  d!ee1 de2d#e0fd'd(Z4dedddddddddddd)de0d ee0 dee0 dee0e	df deeee0f  dee1 dee deee0ee0 f  deee0ee0 f  dee0 deeee0 e0f  deeee0 e0f  de2d"eee!  fd*d+Z5d,d- Z6d.e0fd/d0Z7d1ee0ee0 f fd2d3Z8d1ee0 fd4d5Z9deddddddddf
d6ee0 d7e)d8e0de0d9ed:e0d;e0d ee0 dee0 dee deee0ee0 f  deee0ee0 f  deeee0 e0f  deeee0 e0f  de2d<ee0 d"eee!  f"d=d>Z:dS )?    N)nullcontext)	CookieJarPath)DictListOptionalTypeUnion)DEFAULT_DATASET_REVISIONDEFAULT_MODEL_REVISIONINTRA_CLOUD_ACCELERATIONREPO_TYPE_DATASETREPO_TYPE_MODELREPO_TYPE_SUPPORT)get_modelscope_cache_dir)
get_logger)thread_executor   )HubApiModelScopeConfig)ProgressCallback)InvalidParameter)$create_temporary_directory_and_cachedownload_fileget_file_download_url)ModelFileSystemCache)get_model_masked_directorymodel_id_to_group_owner_name	strtoboolweak_file_lockF   model_idrevision	cache_dir
user_agentlocal_files_onlycookiesignore_file_patternallow_file_pattern	local_dirallow_patternsignore_patternsmax_workersrepo_id	repo_typeenable_file_lockprogress_callbacksreturnc                 C   s  |p| }|s
t d|tvrt d| dt |du r$|tkr"tnt}|du r1ttjdd}|rY|dur9|nt	 }tj
tj|ddd	 tj|d|d
d}t|}nt }| t|||||||||||
|	||dW  d   S 1 s{w   Y  dS )a  Download all files of a repo.
    Downloads a whole snapshot of a repo's files at the specified revision. This
    is useful when you want all files from a repo, because you don't know which
    ones you will need a priori. All files are nested inside a folder in order
    to keep their actual filename relative to that folder.

    An alternative would be to just clone a repo but this would require that the
    user always has git and git-lfs installed, and properly configured.

    Args:
        repo_id (str): A user or an organization name and a repo name separated by a `/`.
        model_id (str): A user or an organization name and a model name separated by a `/`.
            if `repo_id` is provided, `model_id` will be ignored.
        repo_type (str, optional): The type of the repo, either 'model' or 'dataset'.
        revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
            commit hash. NOTE: currently only branch and tag name is supported
        cache_dir (str, Path, optional): Path to the folder where cached files are stored, model will
            be save as cache_dir/model_id/THE_MODEL_FILES.
        user_agent (str, dict, optional): The user-agent info in the form of a dictionary or a string.
        local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the
            local cached file if it exists.
        cookies (CookieJar, optional): The cookie of the request, default None.
        ignore_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be ignored in downloading, like exact file names or file extensions.
        allow_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be downloading, like exact file names or file extensions.
        local_dir (str, optional): Specific local directory path to which the file will be downloaded.
        allow_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, only files matching at least one pattern are downloaded, priority over allow_file_pattern.
            For hugging-face compatibility.
        ignore_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern.
            For hugging-face compatibility.
        max_workers (`int`): The maximum number of workers to download files, default 8.
        enable_file_lock (`bool`): Enable file lock, this is useful in multiprocessing downloading, default `True`.
            If you find something wrong with file lock and have a problem modifying your code,
            change `MODELSCOPE_HUB_FILE_LOCK` env to `false`.
        progress_callbacks (`List[Type[ProgressCallback]]`, **optional**, default to `None`):
            progress callbacks to track the download progress.
    Raises:
        ValueError: the value details.

    Returns:
        str: Local folder path (string) of repo snapshot

    Note:
        Raises the following errors:
        - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
        if `use_auth_token=True` and the token cannot be found.
        - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
        ETag cannot be determined.
        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
        if some parameter value is invalid
    z*Please provide a valid model_id or repo_idInvalid repo type: z, only support: NMODELSCOPE_HUB_FILE_LOCKtrue.lockTexist_ok/___)r/   r#   r$   r%   r&   r'   r(   r)   r*   r,   r+   r-   r1   )
ValueErrorr   r   r   r   r   osenvirongetr   makedirspathjoinreplacer    r   _snapshot_download)r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   system_cache	lock_filecontext rG   g/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/modelscope/hub/snapshot_download.pysnapshot_download!   sN   I


$rI   
dataset_idc                 C   s   |du rt tjdd}|r5|dur|nt }tjtj|ddd tj|d| dd}t	|}nt
 }| t| t|||||||||
|	|d	W  d   S 1 sVw   Y  dS )
aZ  Download raw files of a dataset.
    Downloads all files at the specified revision. This
    is useful when you want all files from a dataset, because you don't know which
    ones you will need a priori. All files are nested inside a folder in order
    to keep their actual filename relative to that folder.

    An alternative would be to just clone a dataset but this would require that the
    user always has git and git-lfs installed, and properly configured.

    Args:
        dataset_id (str): A user or an organization name and a dataset name separated by a `/`.
        revision (str, optional): An optional Git revision id which can be a branch name, a tag, or a
            commit hash. NOTE: currently only branch and tag name is supported
        cache_dir (str, Path, optional): Path to the folder where cached files are stored, dataset will
            be save as cache_dir/dataset_id/THE_DATASET_FILES.
        local_dir (str, optional): Specific local directory path to which the file will be downloaded.
        user_agent (str, dict, optional): The user-agent info in the form of a dictionary or a string.
        local_files_only (bool, optional): If `True`, avoid downloading the file and return the path to the
            local cached file if it exists.
        cookies (CookieJar, optional): The cookie of the request, default None.
        ignore_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be ignored in downloading, like exact file names or file extensions.
            Use regression is deprecated.
        allow_file_pattern (`str` or `List`, *optional*, default to `None`):
            Any file pattern to be downloading, like exact file names or file extensions.
        allow_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, only files matching at least one pattern are downloaded, priority over allow_file_pattern.
            For hugging-face compatibility.
        ignore_patterns (`str` or `List`, *optional*, default to `None`):
            If provided, files matching any of the patterns are not downloaded, priority over ignore_file_pattern.
            For hugging-face compatibility.
        enable_file_lock (`bool`): Enable file lock, this is useful in multiprocessing downloading, default `True`.
            If you find something wrong with file lock and have a problem modifying your code,
            change `MODELSCOPE_HUB_FILE_LOCK` env to `false`.
        max_workers (`int`): The maximum number of workers to download files, default 8.
    Raises:
        ValueError: the value details.

    Returns:
        str: Local folder path (string) of repo snapshot

    Note:
        Raises the following errors:
        - [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
        if `use_auth_token=True` and the token cannot be found.
        - [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError) if
        ETag cannot be determined.
        - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
        if some parameter value is invalid
    Nr4   r5   r6   Tr7   r9   r:   )r/   r#   r$   r%   r&   r'   r(   r)   r*   r,   r+   r-   )r   r<   r=   r>   r   r?   r@   rA   rB   r    r   rC   r   )rJ   r#   r$   r*   r%   r&   r'   r(   r)   r+   r,   r0   r-   rD   rE   rF   rG   rG   rH   dataset_snapshot_download   s:   A


$rK   )r/   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r1   c                C   s  |st }|tvrtd|tf t| |	||d\}}|d ur |nt }|r;t|jdkr0tdt	d|  |
 S tj|dtt d}tdkrdtd	pUt  }|rdtd
|   ||d< t }|j| |d}|d u rvt }|t krJ|	rtj|	}n|rtjj|g| dR  }ntjj|dg| dR  }td| d|  |j| |||d}|d }dtjv ri |ddi}ni |ddi}|jd ur|j|d< |j | |d|d u rdn|||d}t!|||| |d d ||||||||
|||d d| v rIt"|| }tj#|rtd ntd| d ztj$tj||dd  W n t%yH   t	d!| d"tj| d Y nsw nq|t&kr|	rYtj|	}n|rjtjj|g| dR  }ntjj|d#g| dR  }td$|  t'| \}}|pt(}td% t)|| ||}|d u rt*d&|   d S t!|||| |||||||||||
|||d |j+|d' |
 }|S )(Nz'Invalid repo type: %s, only support: %s)r*   r$   r/   r   zCannot find the requested files in the cached path and outgoing traffic has been disabled. To enable look-ups and downloads online, set 'local_files_only' to False.z6We can not confirm the cached file is for revision: %s)r%   )z
user-agentzsnapshot-identifierr5   ZINTRA_CLOUD_ACCELERATION_REGIONz6Intra-cloud acceleration enabled for downloading from zx-aliyun-region-id)r.   r/   r9   modelszDownloading Model from z to directory: )r#   r'   endpointZRevisionZCI_TESTzsnapshot-ci-testTrueZSnapshotcached_model_revisionTF)r"   r#   	recursiveZuse_cookiesheadersrM   )
r/   r#   r'   r(   r)   r,   r+   r-   rM   r1   .z3Target directory already exists, skipping creation.zCreating symbolic link [z].)target_is_directoryzFailed to create symbolic link z for Zdatasetsz"Downloading Dataset to directory: z"Fetching dataset repo file list...z*Failed to retrieve file list for dataset: )Zrevision_info),r   r   r   r   r   lencached_filesr;   loggerwarningZget_root_locationr   Zget_user_agentstruuiduuid4r   r<   getenvr   Z!_get_internal_acceleration_domaininfoZget_endpoint_for_readZget_cookiesr@   abspathrA   splitprintZget_valid_revision_detailr=   rO   Zget_model_files_download_file_listsr   existssymlinkOSErrorr   r   r   fetch_repo_fileserrorZsave_model_version)r.   r/   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r1   temporary_cache_dircacherD   rQ   Z	region_id_apirM   	directoryZrevision_detailZsnapshot_header
repo_filesZmasked_directorygroup_or_ownernameZcache_root_pathrG   rG   rH   rC      s0  













rC   c           	   
   C   s   d}d}g }	 z| j ||dd|||d}W n ty0 } ztd|  W Y d }~|S d }~ww || t||k r?	 |S |d7 }q)Nr      Tr9   )r.   r#   	root_pathrP   page_number	page_sizerM   zError fetching dataset files: )Zget_dataset_files	ExceptionrV   re   extendrT   )	rh   r.   r#   rM   ro   rp   rj   Zdataset_fileserG   rG   rH   rd     s4   

	
rd   patternc                 C   s&   zt |  W dS  ty   Y dS w )NTF)recompileBaseException)rt   rG   rG   rH   _is_valid_regex  s   
rx   patternsc                 C   s*   t | tr| g} | d urdd | D } | S )Nc                 S   s"   g | ]}| d s|n|d qS )r9   *)endswith).0itemrG   rG   rH   
<listcomp>  s    z'_normalize_patterns.<locals>.<listcomp>)
isinstancerX   )ry   rG   rG   rH   _normalize_patterns  s   
r   c                 C   s0   | d urg }| D ]}t |r|| q|S d S )N)rx   append)ry   Zregex_patternsr}   rG   rG   rH   _get_valid_regex_pattern  s   
r   rj   rg   rf   apirl   rk   rM   c                    s  t |}t |}t |}t |}t|}g }| D ]d dkr!qzm|r1tfdd|D r1W q|r@tfdd|D r@W q|rOtfdd|D rOW q|d urb|rbtfdd|D sbW q|d uru|rutfd	d|D suW qrtjd
 }td| d W qW n t	y } zt
d|  W Y d }~qd }~ww | qt|dd 	
fdd}t|dkrtdt| d || td
 d	 d d S d S )Nr	   treec                       g | ]
}t   d  |qS r   fnmatchr|   rt   	repo_filerG   rH   r~         z(_download_file_lists.<locals>.<listcomp>c                    r   r   r   r   r   rG   rH   r~     r   c                    s    g | ]}t | d  duqS )NameN)ru   searchr   r   rG   rH   r~     s    c                 3        | ]}t   d  |V  qdS r   Nr   r   r   rG   rH   	<genexpr>  
    
z'_download_file_lists.<locals>.<genexpr>c                 3   r   r   r   r   r   rG   rH   r     r   r   zFile z8 already in cache with identical hash, skip downloading!z The file pattern is invalid : %sF)r-   disable_tqdmc              
      sp   	t krt| d 
d}n	tkr  j| d 
d}n
td	 dt t|| dd d S )Nr   )r"   	file_pathr#   rM   )	file_nameZdataset_name	namespacer#   rM   r3   z, supported types: F)r   r1   )r   r   r   Zget_dataset_file_urlr   r   r   )r   url)r   rg   r'   rM   rk   rQ   rl   r1   r.   r/   r#   rf   rG   rH   _download_single_file(  s8   
z3_download_file_lists.<locals>._download_single_filer   zGot z files, start to download ...z	Download z 'z' successfully.)r   r   anyra   r<   r@   basenamerV   debugrq   rW   r   r   rT   r\   )rj   rg   rf   r.   r   rl   rk   rQ   r/   r#   r'   r(   r)   r+   r,   r-   rM   r1   Zignore_regex_patternZfiltered_repo_filesr   rs   r   rG   )r   rg   r'   rM   rk   rQ   rl   r1   r   r.   r/   r#   rf   rH   r`     sj   


$r`   );r   r<   ru   rY   
contextlibr   http.cookiejarr   pathlibr   typingr   r   r   r	   r
   Zmodelscope.utils.constantr   r   r   r   r   r   Zmodelscope.utils.file_utilsr   Zmodelscope.utils.loggerr   Zmodelscope.utils.thread_utilsr   r   r   r   callbackr   errorsr   Zfile_downloadr   r   r   Zutils.cachingr   Zutils.utilsr   r   r   r    rV   rX   boolintrI   rK   rC   rd   rx   r   r   r`   rG   rG   rG   rH   <module>   s   	


v	

b	


 1
	

