o
    yqi6                     @   s  d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ d	d
lmZmZmZmZmZmZmZ d	dlmZ d	dlmZmZ e
r^d	dl m!Z! e"e#Z$e%dZ&dZ'dddZ(eG dd dZ)e				dBdee) dee* de*de*dee* dee* dee+e*e*f  dee,e*  de-e,e+ e,e+ ee* f fddZ.G dd  d eZ/G d!d" d"eZ0			dCd#d$d%e+dee* dee+e*e*f  dee* ddfd&d'Z1d(e+fd)d*Z2d+e+fd,d-Z3d.e+fd/d0Z4d#d$d1e*ddfd2d3Z5d#d$d4e+d5e6d1e*ddf
d6d7Z7d4e+d8e)d5e6de,e* fd9d:Z8d;e,e+ d<e*de0fd=d>Z9d#d$d?e,e* d5e6de,e+ fd@dAZ:dS )Dz.Git LFS related type definitions and utilities    N)	dataclass)ceil)getsize)TYPE_CHECKINGBinaryIOIterableOptional	TypedDict)unquote)	constants   )build_hf_headersfix_hf_endpoint_in_urlget_sessionhf_raise_for_statushttp_backoffloggingvalidate_hf_hub_args)SliceFileObj)sha256sha_fileobj)CommitOperationAddz^[0-9a-f]{40}$zlfs-multipart-uploadzapplication/vnd.git-lfs+json)AcceptzContent-Typec                   @   s`   e Zd ZU dZeed< eed< eed< edefddZ	edefd	d
Z
edefddZdS )
UploadInfoad  
    Dataclass holding required information to determine whether a blob
    should be uploaded to the hub using the LFS protocol or the regular protocol

    Args:
        sha256 (`bytes`):
            SHA256 hash of the blob
        size (`int`):
            Size in bytes of the blob
        sample (`bytes`):
            First 512 bytes of the blob
    r   sizesamplepathc                 C   s\   t |}t|d}|dd d }t|}W d    n1 s"w   Y  | |||dS )Nrb   r   r   r   )r   ioopenpeekr   )clsr   r   filer   sha r&   Z/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/huggingface_hub/lfs.py	from_pathJ   s   
zUploadInfo.from_pathdatac                 C   s&   t | }| t||d d |dS )Nr   )r   r   r   )r   digestlen)r#   r)   r%   r&   r&   r'   
from_bytesR   s   zUploadInfo.from_bytesfileobjc                 C   sD   | d}|dtj t|}| }|dtj | |||dS )Nr   r   r   )readseekr    SEEK_SETr   tell)r#   r-   r   r%   r   r&   r&   r'   from_fileobjW   s   
zUploadInfo.from_fileobjN)__name__
__module____qualname____doc__bytes__annotations__intclassmethodstrr(   r,   r   r2   r&   r&   r&   r'   r   7   s   
 r   upload_infostoken	repo_typerepo_idrevisionendpointheaders	transfersreturnc                 C   s  |dur|nt j}d}|t jv rt j| }| d| | d}	d|dur&|nddgdd	 | D d
d}
|dur?dt|i|
d< i tt|d|pJi }t j|	||
d}t| |	 }|
dd}t|tsltd|
d}t|trx|nd}dd	 |D dd	 |D |fS )a  
    Requests the LFS batch endpoint to retrieve upload instructions

    Learn more: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md

    Args:
        upload_infos (`Iterable` of `UploadInfo`):
            `UploadInfo` for the files that are being uploaded, typically obtained
            from `CommitOperationAdd.upload_info`
        repo_type (`str`):
            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
        repo_id (`str`):
            A namespace (user or an organization) and a repo name separated
            by a `/`.
        revision (`str`, *optional*):
            The git revision to upload to.
        headers (`dict`, *optional*):
            Additional headers to include in the request
        transfers (`list`, *optional*):
            List of transfer methods to use. Defaults to ["basic", "multipart"].

    Returns:
        `LfsBatchInfo`: 3-tuple:
            - First element is the list of upload instructions from the server
            - Second element is a list of errors, if any
            - Third element is the chosen transfer adapter if provided by the server (e.g. "basic", "multipart", "xet")

    Raises:
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If an argument is invalid or the server response is malformed.
        [`HfHubHTTPError`]
            If the server returned an error.
    N /z.git/info/lfs/objects/batchuploadbasic	multipartc                 S   s   g | ]}|j  |jd qS )oidr   )r   hexr   ).0rG   r&   r&   r'   
<listcomp>   s    z'post_lfs_batch_info.<locals>.<listcomp>r   )	operationrC   objectsZ	hash_algonameref)r=   rB   jsonrP   zMalformed response from serverZtransferc                 S   s   g | ]
}d |vrt |qS error)_validate_batch_actionsrM   objr&   r&   r'   rN          c                 S   s   g | ]
}d |v rt |qS rU   )_validate_batch_errorrX   r&   r&   r'   rN      rZ   )r   ZENDPOINTZREPO_TYPES_URL_PREFIXESr
   LFS_HEADERSr   r   postr   rT   get
isinstancelist
ValueErrorr;   )r<   r=   r>   r?   r@   rA   rB   rC   Z
url_prefixZ	batch_urlpayloadrespZ
batch_inforP   Zchosen_transferr&   r&   r'   post_lfs_batch_infoa   sB   ,



rd   c                   @   s   e Zd ZU eed< eed< dS )PayloadPartT
partNumberetagN)r3   r4   r5   r9   r8   r;   r&   r&   r&   r'   re      s   
 re   c                   @   s&   e Zd ZU dZeed< ee ed< dS )CompletionPayloadTz?Payload that will be sent to the Hub when uploading multi-part.rK   partsN)r3   r4   r5   r6   r;   r8   r`   re   r&   r&   r&   r'   rh      s   
 rh   rO   r   lfs_batch_actionc              	   C   sH  t | |d}|du rtd| j d dS |d d }t| |d d}|dur2t| |di }|d}	t|d	 |d
}
|	durjzt|	}	W n tt	fy`   td|	 dw t
| ||	|
d nt| |
d |durt| t|d	 |}t j|t||d| jj | jjdd}t| t| j d dS )a9  
    Handles uploading a given object to the Hub with the LFS protocol.

    Can be a No-op if the content of the file is already present on the hub large file storage.

    Args:
        operation (`CommitOperationAdd`):
            The add operation triggering this upload.
        lfs_batch_action (`dict`):
            Upload instructions from the LFS batch endpoint for this object. See [`~utils.lfs.post_lfs_batch_info`] for
            more details.
        headers (`dict`, *optional*):
            Headers to include in the request, including authentication and user agent headers.

    Raises:
        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
            If `lfs_batch_action` is improperly formatted
        [`HfHubHTTPError`]
            If the upload resulted in an error
    actionsNzContent of file z. is already present upstream - skipping uploadrG   verifyheader
chunk_sizehref)rA   zTMalformed response from LFS batch endpoint: `chunk_size` should be an integer. Got 'z'.)rO   rm   rn   
upload_url)rO   rp   )r=   rB   rJ   rS   z: Upload successful)rW   r^   loggerdebugZpath_in_repo_validate_lfs_actionr   r9   ra   	TypeError_upload_multi_part_upload_single_partr   r]   r   upload_infor   rL   r   r   )rO   rj   r=   rB   rA   rk   upload_actionverify_actionrm   rn   rp   Z
verify_urlZverify_respr&   r&   r'   
lfs_upload   sB   



rz   
lfs_actionc                 C   s:   t | dtr| ddu st | dtstd| S ).validates response from the LFS batch endpointro   rm   Nz"lfs_action is improperly formatted)r_   r^   r;   dictra   )r{   r&   r&   r'   rs   
  s
   rs   lfs_batch_actionsc                 C   sp   t | dtrt | dtstd| di d}| di d}|dur.t| |dur6t| | S )r|   rK   r   z)lfs_batch_actions is improperly formattedrk   rG   rl   N)r_   r^   r;   r9   ra   rs   )r~   rx   ry   r&   r&   r'   rW     s    rW   lfs_batch_errorc                 C   sh   t | dtrt | dtstd| d}t |tr.t |dtr.t |dts2td| S )r|   rK   r   z'lfs_batch_error is improperly formattedrV   messagecode)r_   r^   r;   r9   ra   r}   )r   
error_infor&   r&   r'   r[   "  s    
r[   rp   c                 C   sH   | j dd}td||d}t| W d   dS 1 sw   Y  dS )aK  
    Uploads `fileobj` as a single PUT HTTP request (basic LFS transfer protocol)

    Args:
        upload_url (`str`):
            The URL to PUT the file to.
        fileobj:
            The file-like object holding the data to upload.

    Raises:
        [`HfHubHTTPError`]
            If the upload resulted in an error.
    TZ	with_tqdmPUTr)   N)as_filer   r   )rO   rp   r-   responser&   r&   r'   rv   0  s   
"rv   rm   rn   c                 C   sJ   t || j|d}t| ||d}t j|t|| jj td}t	| dS )z@
    Uploads file using HF multipart LFS transfer protocol.
    )rm   rw   rn   )rO   sorted_parts_urlsrn   )rT   rB   N)
_get_sorted_parts_urlsrw   _upload_parts_iterativelyr   r]   _get_completion_payloadr   rL   r\   r   )rO   rm   rn   rp   r   response_headersZcompletion_resr&   r&   r'   ru   D  s   ru   rw   c                 C   sN   dd t dd |  D dd dD }t|}|t|j| kr%td|S )Nc                 S   s   g | ]\}}|qS r&   r&   )rM   _rp   r&   r&   r'   rN   Z  s    z*_get_sorted_parts_urls.<locals>.<listcomp>c                 S   s2   g | ]\}}|  rt|d krt|d|fqS )r   
   )isdigitr+   r9   )rM   Zpart_numrp   r&   r&   r'   rN   ]  s    c                 S   s   | d S )Nr   r&   )tr&   r&   r'   <lambda>b  s    z(_get_sorted_parts_urls.<locals>.<lambda>)keyz0Invalid server response to upload large LFS file)sorteditemsr+   r   r   ra   )rm   rw   rn   Zsorted_part_upload_urlsZ	num_partsr&   r&   r'   r   Y  s   r   r   rK   c                 C   sf   g }t | D ]'\}}|d}|d u s|dkr#td| d|d  ||d |d q||dS )Nrg   rE   zInvalid etag (`z`) returned for part r   )rf   rg   )rK   ri   )	enumerater^   ra   append)r   rK   ri   Zpart_numberrm   rg   r&   r&   r'   r   k  s   

r   r   c           	   
   C   s   g }| j dd<}t|D ].\}}t||| |d}td||d}t| ||j W d    n1 s6w   Y  qW d    |S 1 sGw   Y  |S )NTr   )Z	seek_fromZ
read_limitr   r   )r   r   r   r   r   r   rB   )	rO   r   rn   rB   r-   Zpart_idxZpart_upload_urlZfileobj_sliceZpart_upload_resr&   r&   r'   r   z  s&   
r   )NNNN)NNN);r6   r    redataclassesr   mathr   Zos.pathr   typingr   r   r   r   r	   urllib.parser
   Zhuggingface_hubr   utilsr   r   r   r   r   r   r   Z
utils._lfsr   Z	utils.shar   r   Z_commit_apir   Z
get_loggerr3   rq   compileZ	OID_REGEXZLFS_MULTIPART_UPLOAD_COMMANDr\   r   r;   r}   r`   tuplerd   re   rh   rz   rs   rW   r[   rv   r9   ru   r   r   r   r&   r&   r&   r'   <module>   s   $	

)
	V

F
