o
    W+ iJ                     @   sx  d Z ddlZddlmZ ddlmZ ddlmZmZm	Z	m
Z
mZmZmZ ddlmZmZ ddlmZ ddlmZmZmZ dd	lmZmZ dd
lmZ ddlmZ e ZddgZeddG dd dZ eddG dd dZ!eddG dd dZ"eddG dd dZ#	d'deee$ef  de#fddZ%dede&fddZ'd(ded e$d!e&fd"d#Z(d$ed e$dee" fd%d&Z)dS ))z<Contains utilities to manage the ModelScope cache directory.    N)	dataclass)Path)Dict	FrozenSetListLiteralOptionalSetUnion)CacheNotFoundCorruptedCacheException)ModelFileSystemCache)convert_readable_sizeformat_timesincetabulate)REPO_TYPE_DATASETREPO_TYPE_MODEL)get_modelscope_cache_dir)
get_loggerz	.DS_Storez
._____tempT)frozenc                   @   s   e Zd ZU dZeed< eed< eed< eed< eed< eed< eed< e	d	efd
dZ
e	d	efddZe	d	efddZdS )CachedFileInfoa	  Frozen data structure holding information about a single cached file.

    Args:
        file_name (`str`):
            Name of the file. Example: `config.json`.
        file_path (`Path`):
            Path of the file in the `snapshots` directory. The file path is a symlink
            referring to a blob in the `blobs` folder.
        blob_path (`Path`):
            Path of the blob file. This is equivalent to `file_path.resolve()`.
        size_on_disk (`int`):
            Size of the blob file in bytes.
        blob_last_accessed (`float`):
            Timestamp of the last time the blob file has been accessed (from any
            revision).
        blob_last_modified (`float`):
            Timestamp of the last time the blob file has been modified/created.
    	file_name	file_pathfile_revision_hash	blob_pathsize_on_diskblob_last_accessedblob_last_modifiedreturnc                 C   
   t | jS )z
        (property) Timestamp of the last time the blob file has been accessed (from any
        revision), returned as a human-readable string.

        Example: "2 weeks ago".
        )r   r   self r"   h/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/modelscope/hub/cache_manager.pyblob_last_accessed_str4      
z%CachedFileInfo.blob_last_accessed_strc                 C   r   )z
        (property) Timestamp of the last time the blob file has been modified, returned
        as a human-readable string.

        Example: "2 weeks ago".
        )r   r   r    r"   r"   r#   blob_last_modified_str>   r%   z%CachedFileInfo.blob_last_modified_strc                 C   r   )zi
        (property) Size of the blob file as a human-readable string.

        Example: "42.2K".
        r   r   r    r"   r"   r#   size_on_disk_strH      
zCachedFileInfo.size_on_disk_strN)__name__
__module____qualname____doc__str__annotations__r   intfloatpropertyr$   r&   r(   r"   r"   r"   r#   r      s   
 		r   c                   @   st   e Zd ZU dZeed< eed< eed< ee	 ed< e
ed< edefdd	Zedefd
dZedefddZdS )CachedRevisionInfoa  Frozen data structure holding information about a revision.

    Args:
        commit_hash (`str`):
            Hash of the revision (unique).
            Example: `"9338f7b671827df886678df2bdd7cc7b4f36dffd"`.
        snapshot_path (`Path`):
            Path to the revision directory in the `snapshots` folder. It contains the
            exact tree structure as the repo on the Hub.
        files: (`FrozenSet[CachedFileInfo]`):
            Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
        size_on_disk (`int`):
            Sum of the blob file sizes that are symlink-ed by the revision.
        last_modified (`float`):
            Timestamp of the last time the revision has been created/modified.
    commit_hashsnapshot_pathr   fileslast_modifiedr   c                 C   r   )z
        (property) Timestamp of the last time the revision has been modified, returned
        as a human-readable string.

        Example: "2 weeks ago".
        r   r7   r    r"   r"   r#   last_modified_strl   r%   z$CachedRevisionInfo.last_modified_strc                 C   r   zn
        (property) Sum of the blob file sizes as a human-readable string.

        Example: "42.2K".
        r'   r    r"   r"   r#   r(   v   r)   z#CachedRevisionInfo.size_on_disk_strc                 C   r   )zC
        (property) Total number of files in the revision.
        )lenr6   r    r"   r"   r#   nb_files   s   
zCachedRevisionInfo.nb_filesN)r*   r+   r,   r-   r.   r/   r   r0   r   r   r1   r2   r9   r(   r<   r"   r"   r"   r#   r3   R   s   
 	r3   c                   @   s   e Zd ZU dZeed< eed< eed< eed< eed< ee	 ed< e
ed< e
ed	< ed
efddZed
efddZed
efddZdS )CachedRepoInfoa  Frozen data structure holding information about a cached repository.

    Args:
        repo_id (`str`):
            Repo id of the repo on the Hub. Example: `"damo/bert-base-chinese"`.
        repo_type (`Literal["dataset", "model"]`):
            Type of the cached repo.
        repo_path (`Path`):
            Local path to the cached repo.
        size_on_disk (`int`):
            Sum of the blob file sizes in the cached repo.
        nb_files (`int`):
            Total number of blob files in the cached repo.
        revisions (`FrozenSet[CachedRevisionInfo]`):
            Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
        last_accessed (`float`):
            Timestamp of the last time a blob file of the repo has been accessed.
        last_modified (`float`):
            Timestamp of the last time a blob file of the repo has been modified/created.
    repo_id	repo_type	repo_pathr   r<   	revisionslast_accessedr7   r   c                 C   r   )z
        (property) Last time a blob file of the repo has been accessed, returned as a
        human-readable string.

        Example: "2 weeks ago".
        )r   rB   r    r"   r"   r#   last_accessed_str   r%   z CachedRepoInfo.last_accessed_strc                 C   r   )z
        (property) Last time a blob file of the repo has been modified, returned as a
        human-readable string.

        Example: "2 weeks ago".
        r8   r    r"   r"   r#   r9      r%   z CachedRepoInfo.last_modified_strc                 C   r   r:   r'   r    r"   r"   r#   r(      r)   zCachedRepoInfo.size_on_disk_strN)r*   r+   r,   r-   r.   r/   r   r0   r   r3   r1   r2   rC   r9   r(   r"   r"   r"   r#   r=      s    
 		r=   c                   @   sR   e Zd ZU dZeed< ee ed< ee	 ed< e
defddZdefdd	Zd
S )ModelScopeCacheInfoa  Frozen data structure holding information about the entire cache-system.

    This data structure is returned by [`scan_cache_dir`] and is immutable.

    Args:
        size_on_disk (`int`):
            Sum of all valid repo sizes in the cache-system.
        repos (`FrozenSet[CachedRepoInfo]`):
            Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
            cache-system while scanning.
        warnings (`List[CorruptedCacheException]`):
            List of [`~CorruptedCacheException`] that occurred while scanning the cache.
            Those exceptions are captured so that the scan can continue. Corrupted repos
            are skipped from the scan.
    r   reposwarningsr   c                 C   r   )zp
        (property) Sum of all valid repo sizes in the cache-system as a human-readable
        string.
        r'   r    r"   r"   r#   r(      s   
z$ModelScopeCacheInfo.size_on_disk_strc                    sN   dt dtdtt fdd g d} fddt| jd	d
 dD }t||dS )a  Generate a detailed table from the [`ModelScopeCacheInfo`] object.

        Returns a table with a row per repo and revision (thus multiple rows can appear for a single repo), with columns
        "repo_id", "repo_type", "revision", "size_on_disk", "nb_files", "last_modified", "local_path".

        Example:
        ```py
        >>> from modelscope.hub.cache_manager import scan_cache_dir

        >>> ms_cache_info = scan_cache_dir()
        ModelScopeCacheInfo(...)

        >>> print(ms_cache_info.export_as_table())
        REPO ID                REPO TYPE REVISION   SIZE ON DISK NB FILES LAST_MODIFIED LOCAL PATH
        ---------------------- --------- ---------- ------------ -------- -------------  -------------------------------------------------------------
        damo/bert-base-chinese model     master             2.7M        5 1 week ago     ~/.cache/modelscope/hub/models--damo--bert-base-chinese/...
        damo/structured-bert   model     master             8.8K        1 1 week ago     ~/.cache/modelscope/hub/models--damo--structured-bert/...
        damo/t5-base           model     master           893.8M        4 7 months ago   ~/.cache/modelscope/hub/models--damo--t5-base/...
        ```

        Returns:
            `str`: The table as a string.
        reporevisionr   c              	   S   s.   | j | j|jd| j| j| j| jt| j	gS )zHFormat a single repo and revision into a list of strings for tabulation.z{:>12})
r>   r?   r4   formatr(   r<   rC   r9   r.   r@   )rG   rH   r"   r"   r#   format_repo_revision   s   
zAModelScopeCacheInfo.export_as_table.<locals>.format_repo_revision)zREPO IDz	REPO TYPEZREVISIONzSIZE ON DISKzNB FILESZLAST_ACCESSEDZLAST_MODIFIEDz
LOCAL PATHc                    s.   g | ]}t |jd d dD ]} ||qqS )c                 S      | j S N)r4   )rH   r"   r"   r#   <lambda>      z@ModelScopeCacheInfo.export_as_table.<locals>.<listcomp>.<lambda>key)sortedrA   ).0rG   rH   rJ   r"   r#   
<listcomp>  s    
z7ModelScopeCacheInfo.export_as_table.<locals>.<listcomp>c                 S   rK   rL   )r>   )rG   r"   r"   r#   rM     rN   z5ModelScopeCacheInfo.export_as_table.<locals>.<lambda>rO   )rowsheaders)r=   r3   r   r.   rQ   rE   r   )r!   Zcolumn_headersZ
table_datar"   rS   r#   export_as_table   s   

z#ModelScopeCacheInfo.export_as_tableN)r*   r+   r,   r-   r0   r/   r   r=   r   r   r2   r.   r(   rW   r"   r"   r"   r#   rD      s   
 rD   	cache_dirr   c                 C   s  | du rt  } t|   } |  std|  d| d|  r)td|  dt }g }| d }| d }| rP|	 rPt
|td\}}|| || | rj|	 rjt
|td\}}|| || t
| td	d
\}	}
||	 ||
 tt|tdd |D |dS )a  Scan the entire ModelScope cache-system and return a [`ModelScopeCacheInfo`] structure.

    Use `scan_cache_dir` to programmatically scan your cache-system. The cache
    will be scanned repo by repo. If a repo is corrupted, a [`~CorruptedCacheException`]
    will be thrown internally but captured and returned in the [`~ModelScopeCacheInfo`]
    structure. Only valid repos get a proper report.

    ```py
    >>> from modelscope.hub.utils import scan_cache_dir

    >>> ms_cache_info = scan_cache_dir()
    ModelScopeCacheInfo(
        size_on_disk=3398085269,
        repos=frozenset({
            CachedRepoInfo(
                repo_id='damo/t5-small',
                repo_type='model',
                repo_path=PosixPath(...),
                size_on_disk=970726914,
                nb_files=11,
                revisions=frozenset({
                    CachedRevisionInfo(
                        commit_hash='master',
                        size_on_disk=970726339,
                        snapshot_path=PosixPath(...),
                        files=frozenset({
                            CachedFileInfo(
                                file_name='config.json',
                                size_on_disk=1197
                                file_path=PosixPath(...),
                                blob_path=PosixPath(...),
                            ),
                            CachedFileInfo(...),
                            ...
                        }),
                    ),
                    CachedRevisionInfo(...),
                    ...
                }),
            ),
            CachedRepoInfo(...),
            ...
        }),
        warnings=[
            CorruptedCacheException("Snapshots dir doesn't exist in cached repo: ..."),
            CorruptedCacheException(...),
            ...
        ],
    )
    ```

    Args:
        cache_dir (`str` or `Path`, `optional`):
            Cache directory to scan. Defaults to the default ModelScope cache directory.

    Raises:
        `CacheNotFound`: If the cache directory does not exist.
        `ValueError`: If the cache directory is a file, instead of a directory.

    Returns: a [`ModelScopeCacheInfo`] object.
    NzCache directory not found: zQ. Please use `cache_dir` argument or set `MODELSCOPE_CACHE` environment variable.)rX   z1Scan cache expects a directory but found a file: modelsdatasetsr?   T)r?   inplacec                 s       | ]}|j V  qd S rL   )r   )rR   rG   r"   r"   r#   	<genexpr>      z!scan_cache_dir.<locals>.<genexpr>)rE   r   rF   )r   r   
expanduserresolveexistsr   is_file
ValueErrorsetis_dir	_scan_dirr   updateextendr   rD   	frozensetsum)rX   rE   rF   Z	model_dirZdataset_dirZmodel_reposZmodel_warningsZdataset_reposZdataset_warningsZother_reposZother_warningsr"   r"   r#   scan_cache_dir#  sL   ?










rl   dirc                 C   s6   |   sdS |  sdS |  rdS | jtv rdS dS )z+Check if a directory is valid for scanning.FT)rb   rf   
is_symlinknameFILES_TO_IGNORE)rm   r"   r"   r#   _is_valid_dir  s   
rq   Fr?   r\   c           	      C   s   t  }g }|  D ]C}|r|jdv rq	t|sq	| D ]/}t|s#qzt||d}|dur3|| W q tyK } z|| W Y d}~qd}~ww q	||fS )zWScan a directory for cached repos and return a set of [`~CachedRepoInfo`] and warnings.)rY   rZ   Zhubr[   N)re   iterdirro   rq   _scan_cached_repoaddr   append)	rm   r?   r\   rE   rF   Z	owner_dirZname_dirinfoer"   r"   r#   rg     s*   
	rg   r@   c                    s  |   std|  ztt| }|j}|j}| dd}|dkr'W dS W n ty; } ztd| d}~ww i  t	 }|D ]?}t
j| |d }	|dd	}
t
j|	s[qCt|	}|  |< |tt
j|d ||
 | j| | j | jd
 qCd}|rd|v r|dd dd }|rt fdd|D }n|  j}t|t|t fdd|D | |d} rtdd   D }tdd   D }n
|  }|j}|j}tt || |t|gtdd   D ||dS )zScan a single cache repo and return information about it.

    Any unexpected behavior will raise a [`~CorruptedCacheException`].
    zRepo path is not a directory: Z___.unknownNz"Failed to load cache information: r   ZRevision )r   r   r   r   r   r   r   Zmasterz	Revision:   ,r   c                 3       | ]	} |j  jV  qd S rL   )r   st_mtimerR   fileZ
blob_statsr"   r#   r^         z$_scan_cached_repo.<locals>.<genexpr>c                 3   r}   rL   )r   st_sizer   r   r"   r#   r^     r   )r4   r6   r   r5   r7   c                 s   r]   rL   )st_atimerR   statr"   r"   r#   r^     r_   c                 s   r]   rL   )r~   r   r"   r"   r#   r^     r_   c                 s   r]   rL   )r   r   r"   r"   r#   r^     r_   )r<   r>   r@   r?   rA   r   rB   r7   )rf   r   r   r.   cached_filescached_model_revisionZget_model_idreplace	Exceptionre   ospathjoingetrb   r   r   rt   r   basenamer   r   r~   splitmaxr3   rj   rk   valuesr=   r;   )r@   r?   cacher   r   r>   rw   Zcached_files_infoZcached_filer   r   r   Zrevision_hashZrevision_last_modifiedZcached_revisionZrepo_last_accessedZrepo_last_modifiedZ
repo_statsr"   r   r#   rs     s   


rs   rL   )F)*r-   r   dataclassesr   pathlibr   typingr   r   r   r   r   r	   r
   Zmodelscope.hub.errorsr   r   Zmodelscope.hub.utils.cachingr   Zmodelscope.hub.utils.utilsr   r   r   Zmodelscope.utils.constantr   r   Zmodelscope.utils.file_utilsr   Zmodelscope.utils.loggerr   loggerrp   r   r3   r=   rD   r.   rl   boolrq   rg   rs   r"   r"   r"   r#   <module>   sD    $;4>]
s