o
    vqiRZ                     @   sL  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
mZ ddlmZmZmZ ddlZddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z& dd	l'm(Z( dd
l)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2m3Z3 ddl4m5Z5 ddl4m6Z6 ddl7m8Z8 ddgZ9					dLde:de:dee: dee; dee: dee: de:dee: fddZ<					dLde:de:dee: dee; dee: dee: de:dee: fd dZ=d!d" Z>dddddd#de:de:de:dee: dee; dee: de:dee: fd$d%Z?defd&e:de:de:fd'd(Z@de:fd)d*ZAde:fd+d,ZBd-d. ZCde:fd/d0ZDd1d2 ZEd3d4 ZF		dMd5d6ZG			dNd7e:de:d8e:d9eee:e:f  d:eHd;e;fd<d=ZId>d? ZJd@dA ZK		dOd7e:de:d8e:d:eHd9eee:e:f  d;e;fdBdCZLdDeHde;fdEdFZMdGe:fdHdIZNdGe:fdJdKZOdS )Pu8   
本文件实现了星河社区git文件的下载功能
    N)quote)DictOptionalUnion   )REPO_TYPE_MODELREPO_TYPE_DATASETREPO_TYPE_SUPPORTMODEL_ID_SEPARATORDEFAULT_AISTUDIO_GROUPTEMPORARY_FOLDER_NAMESTUDIO_GIT_HOST_DEFAULTDEFAULT_DATASET_REVISION'AISTUDIO_PARALLEL_DOWNLOAD_THRESHOLD_MBAISTUDIO_DOWNLOAD_PARALLELSAPI_FILE_DOWNLOAD_RETRY_TIMESAPI_FILE_DOWNLOAD_TIMEOUTAPI_FILE_DOWNLOAD_CHUNK_SIZE	FILE_HASHDEFAULT_MODEL_REVISION)InvalidParameterNotExistErrorRequestError)Path)request_aistudio_git_file_info)ThreadPoolExecutor)Retry)tqdm)ModelFileSystemCache)file_integrity_validationheader_fill)log)switch_downoad)post_repo_statistic_asyncmodel_file_downloadfile_downloadmasterFmodelrepo_id	file_pathrevisionlocal_files_only	local_dir	repo_typetokenreturnc              	   C   s   t | ||||||dS )u   
    增加入口
    r-   r*   r+   r,   r.   )r$   )r(   r)   r*   r+   r,   r-   r.    r1   a/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/aistudio_sdk/file_download.pyr%   1   s   c           	   
   C   sv   t   |du r	t}d|i}zt| || W n ty/ } ztd|  W Y d}~nd}~ww t| ||||||dS )
    download repo
    Npathzrequest.dot.fail: r0   )initr   r#   	Exceptionr!   debug_repo_file_download)	r(   r)   r*   r+   r,   r-   r.   actioner1   r1   r2   r$   H   s&   c                  C   s   t jddds=t jdt dd} |  d}t j|r?t|d}|  t jd< W d   dS 1 s6w   Y  dS dS dS )	u   初始化函数，从本地磁盘加载AI Studio认证令牌。

    Args:
        无参数。

    Returns:
        无返回值。
    AISTUDIO_ACCESS_TOKEN defaultAISTUDIO_CACHE_HOMEHOMEz/.cache/aistudio/.auth/tokenrN)osgetenvr4   existsopenreadstripenviron)Z
cache_homeZtoken_file_pathfiler1   r1   r2   r5   g   s   
"r5   r0   c             
   C   s  |st }|tvrtd|tf t| ||d\}}|r/||}	|	dur+td |	S tdd}
|t ks9|tkret	| |||}d|vrLt
d|| f ||rc|d }td	| d
 ||S |}
|
du rqt
d|| f |t ksy|tkr|
d }|
d }|du rt
d|| f | d\}}| }| }tjdtd}| dt|dd dt|dd dt|dd }|dkr|dt|dd 7 }ntd| t||
|||dS )r3   z'Invalid repo type: %s, only support: %s)r,   r-   Nz>File exists in local cache, but we're not sure it's up to datezCannot find the requested files in the cached path and outgoing traffic has been disabled. To enable look-ups and downloads online, set 'local_files_only' to False.r4   z"The file path: %s not exist in: %snamezFile z8 already in cache with identical hash, skip downloading!sha/STUDIO_GIT_HOSTr=   z/api/v1/repos/r<   )safez/media/r&   z?ref=zInvalid repo type r.   )r   r	   r   $create_temporary_directory_and_cacheZget_file_by_pathr!   warn
ValueErrorr   get_git_infor   rD   r7   Zget_file_by_infosplitrG   rB   rC   r   r   download_file)r(   r)   r-   r*   r+   r,   r.   temporary_cache_dircacheZcached_file_pathZfile_to_download_metaZ	repo_file	file_nameZfile_sha	user_name	repo_namegit_hostZurl_to_downloadr1   r1   r2   r8   {   s~   







r8   model_idc           	      C   s   |t krt }n|tkrt }ntd| t| \}}|dur-tj|t	}t
|}n |}t|tr8t|}tj|t	||}|dd}t
|||}tj|dd ||fS )z
    temp dir
    z7repo_type only support model and dataset, but now is : N.Z___Texist_ok)r   get_model_cache_rootr   get_dataset_cache_rootrR   model_id_to_group_owner_namerB   r4   joinr   r   
isinstancer   strreplacemakedirs)	r\   r,   r-   Zdefault_cache_rootgroup_or_ownerrJ   rV   rW   	cache_dirr1   r1   r2   rP      s*   


rP   c                   C      t jt dS )zYGet model cache root path.

    Returns:
        str: the aistudio model cache root.
    modelsrB   r4   rc   get_aistudio_cache_dirr1   r1   r1   r2   r`      s   r`   c                   C   s   t jt dt S )zGet aistudio cache dir, default location or
       setting with AISTUDIO_CACHE_HOME

    Returns:
        str: the aistudio cache root.
    r?   )rB   r4   
expanduserrC   get_default_aistudio_cache_dirr1   r1   r1   r2   rm      s   rm   c                  C   s   t jt ddd} | S )z.
    default base dir: '~/.cache/aistudio
    z.cacheZaistudioZhub)rB   r4   rn   r   homejoinpath)Zdefault_cache_dirr1   r1   r2   ro     s   ro   c                   C   rj   )zGet dataset raw file cache root path.
    if `AISTUDIO_CACHE_HOME` is set, return `AISTUDIO_CACHE_HOME/datasets`,
    else return `~/.cache/aistudio/hub/datasets`

    Returns:
        str: the aistudio dataset raw file cache root.
    Zdatasetsrl   r1   r1   r1   r2   ra     s   ra   c                 C   s<   t | v r| t d }| t d }||fS t}| }||fS )z
    get name
    r   r   )r
   rT   r   )r\   rh   rJ   r1   r1   r2   rb     s   rb   c                 C   sP   |  d\}}| }| }tjdtd}|stjddd}t||||||S )z
    get meta
    rL   rM   r=   r;   r<   )rT   rG   rB   rC   r   r   )r(   r)   r*   r.   rY   rZ   r[   r1   r1   r2   rS   $  s   
rS   c              
   C   s  t j||d }t jt j|dd |d }	 t|rn$t jdr5t	d|d  d t
d ntd	|d  d d
S qzznt|d}td d |d k rftdkrft| ||d ||d |d}	nt| ||d |d ||d}	|	s~W W t| d
S t j||d }
t|v r|t }|	d
ur|	|krtd t|
| nt|
| |||
W W t| S  ty } ztd| d|  W Y d
}~nd
}~ww W t| d
S t| w )z
    download
    r4   Tr^   z.lockZWAIT_UNTIL_DONEz[Download] WAITING 'z' due to active lock.
   z[Download] Skipping 'NrO      sizer   )	file_sizedisable_tqdm)ru   headersrv   zLMismatched real-time digest found, falling back to lump-sum hash computationz[Download] Error downloading z: )rB   r4   rc   rg   dirnameacquire_pid_lockrH   getr!   rQ   timesleepprintr    r   r   parallel_downloadhttp_get_model_filerelease_pid_lockr   r   Zput_filer6   )urlZ	file_metarV   rW   rv   r.   r)   	lock_pathrw   Zfile_digest	temp_fileZexpected_hashr:   r1   r1   r2   rU   2  sp   
	



 rU   r   rX   rw   ru   rv   c                 C   s  t j||}t jt j|dd tddd|dd| d d|dP}td d }g }	td||D ]}
t|
| d	 |d	 }|		|||
|| ||f q1t
}t|d
d}t|t|	 W d   n1 sfw   Y  W d   n1 suw   Y  t }t|||	| | S )z
    large file downlooad
    Tr^   Brs   r   Downloading []unitZ
unit_scaleZunit_divisortotalinitialZdescZleavedisabler   download)max_workersthread_name_prefixN)rB   r4   rc   rg   rx   r   r   rangeminappendr   r   listmapdownload_part_with_retryhashlibsha256merge_parts_to_file	hexdigest)r   r,   rX   rw   ru   rv   r)   progressZ	PART_SIZEtasksstartendZ	parallelsexecutorhash_sha256r1   r1   r2   r~   y  s>   	
	r~   c              
   C   s  t j| |}|}t j|}|r)d| d}t j| |}t j|r)t | t|dK}|D ]@}	|	d d|	d |	d f  }
t|
d}	 |d
t }|sRn|| |	| qHW d   n1 sgw   Y  t |
 q1W d   n1 s|w   Y  |rt
|| dS dS )z
    merge
    r]   z.tmpwbr   _%s_%s      rbT   N)rB   r4   rc   rD   removerE   rF   r   writeupdateshutilmove)r,   rX   r   r   target_pathZ
write_pathZuse_tempZtemp_file_nameZoutput_filetaskpart_file_nameZ	part_filechunkr1   r1   r2   r     s6   


r   c              
   C   s  | \}}}}}}}|du ri nt |}ttddgd}	|d||f  }
	 zd}tj|
rOt|
d}|dt	j
}|| W d   n1 sJw   Y  || }||krZW dS d	||f |d
< t|
dE}t|||}tj|d|td}|jstd|j  td|j |jtdD ]}|r|| |t| qW d   W dS 1 sw   Y  W dS  ty } z|	jd||d}	td||f  |	  W Y d}~nd}~ww q%)z
    download part
    Nr   GETr   backoff_factorallowed_methodsr   Tr   r   bytes=%s-%sRangeab+streamrw   timeoutzdownload res:zdownload.fail:
chunk_sizeerrorz-Downloading: %s failed, reason: %s will retry)copydeepcopyr   r   rB   r4   rD   rE   seekioSEEK_ENDr   r"   
switch_cdnrequestsrz   r   okr!   r7   contentr   status_codeiter_contentr   r   lenr6   	incrementr|   )paramsZmodel_file_pathr   r   r   r   rX   rw   get_headersretryr   partial_lengthfZdownload_startrA   r   r:   r1   r1   r2   r     sl   


r   c              
   C   s  t j||}t jt j|dd |du ri nt|}tt	 j
|d< t j||}t jt j|dd td|  d|  d}	t }
ttdd	gd
}	 ztddd|dkr]|nddd| d d|d}|dkrt|d |d W d   n1 sw   Y  	 W d   W nd}t j|rd}	t|d}|dtj}|| W d   n1 sw   Y  ||kr	 W d   W nd||d f |d< t|d<}t| ||} tj| d|td}|  |jtdD ]}|r|t | |!| |	s|
| qW d   n	1 sw   Y  W d   n	1 s(w   Y  W n" t"yO } zd}	|j#d	| |d}|$  W Y d}~nd}~ww qR|	rVdS |
% S )a  Download remote file, will retry 5 times before giving up on errors.

    Args:
        url(str):
            actual download url of the file
        local_dir(str):
            local directory where the downloaded file stores
        file_name(str):
            name of the file stored in `local_dir`
        file_size(int):
            The file size.
        cookies(CookieJar):
            cookies used to authentication the user, which is used for downloading private repos
        headers(Dict[str, str], optional):
            http headers to carry necessary info when requesting the remote file
        disable_tqdm(bool, optional): Disable the progress bar with tqdm.

    Raises:
        FileDownloadError: File download failed.

    Tr^   NzX-Request-IDzdownloading z to Fr   r   r   r   rs   r   r   r   r   zw+r   r   r   r   r   r   r   )&rB   r4   rc   rg   rx   r   r   re   uuiduuid4hexr!   r7   r   r   r   r   r   rE   r   rD   r   r   r   r"   r   r   rz   r   raise_for_statusr   r   r   r   r6   r   r|   r   )r   r,   rX   ru   rw   rv   r)   r   Ztemp_file_pathZ	has_retryr   r   r   r   r   rA   r   r:   r1   r1   r2   r     s   
	



/7r   pidc                 C   s2   zt | }| t jkW S  t jy   Y dS w )u-   判断进程是否存在且不是僵尸进程F)psutilProcessstatusZSTATUS_ZOMBIEZNoSuchProcess)r   pr1   r1   r2   is_process_alive]  s   
r   r   c                 C   s`  t t }z-t| tjtjB tjB }t|d}|| W d   W dS 1 s,w   Y  W dS  t	y   z@t| d}t
|  }W d   n1 sSw   Y  t|rgtd|  W Y dS td| d t|  t| W  Y S  ty } ztd	|  W Y d}~Y dS d}~ww  ty } ztd
|  W Y d}~dS d}~ww )z/Use atomic file creation to acquire a PID lock.wNTrA   z[Lock] File is locked by PID Fz[Lock] Stale lock from PID z, removing.z+[Lock] Error checking/removing stale lock: z [Lock] Cannot create lock file: )re   rB   getpidrE   O_CREATO_EXCLO_WRONLYfdopenr   FileExistsErrorintrF   rG   r   r}   r   ry   r6   )r   r   fdr   Zexisting_pidr:   r1   r1   r2   ry   g  s<   

ry   c              
   C   s   z?t j| r:t| d}t|  }W d   n1 sw   Y  |t  kr=t |  t	
d|   W dS W dS W dS  tyY } ztd|  W Y d}~dS d}~ww )z<Release the lock if it is still held by the current process.rA   Nz[Lock] Released lock z[Lock] Error releasing lock: )rB   r4   rD   rE   r   rF   rG   r   r   r!   r7   r6   r}   )r   r   r   r:   r1   r1   r2   r     s   
r   )r&   FNr'   N)FN)NNF)NF)P__doc__r   r{   r   r   errnor   r   r   r   urllib.parser   typingr   r   r   rB   configr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   errorsr   r   r   pathlibr   Zaistudio_sdk.requests.hubr   concurrent.futuresr   Zrequests.adaptersr   Z	tqdm.autor   Zutils.cachingr   Z
utils.utilr   r    Zaistudio_sdkr!   r"   Zaistudio_sdk.dotr#   __all__re   boolr%   r$   r5   r8   rP   r`   rm   ro   ra   rb   rS   rU   r   r~   r   r   r   r   ry   r   r1   r1   r1   r2   <module>   s"  D	

	
T
!		
J
)$6
f
