o
    0 i8*                     @  s   d dl mZ d dlmZ d dlmZ d dlZd dlZd$d
dZd%d&ddZ	d'ddZ
d(d)ddZeG d d! d!ZG d"d# d#ZdS )*    )annotations)	dataclass)LiteralNfilenamestroutput_type
str | Nonereturnc                 C  s<   |d ur|  nd}|d ur| nd}| j|||||dS )N )ZouttypeZftypeZOUTTYPEZFTYPE)lowerupperformat)r   r   Zftype_lowercaseZftype_uppercase r   X/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/gguf/utility.pyfill_templated_filename
   s   r      model_params_countint
min_digitsc                 C  s   | dkr| d }d}n| dkr| d }d}n| dkr!| d }d	}n| d
 }d}t |ttt|d d}|d| d| S )Ng   mBg-q=Tg    eAg&.>Bg    .Agư>MgMbP?K0r   .f)maxlenr   roundlstrip)r   r   Zscaled_model_paramsZscale_suffixZfixr   r   r   #model_weight_count_rounded_notation   s    r    total_paramsshared_paramsexpert_paramsexpert_countc                 C  sF   |dkrt t|t| dd}| d| }|S t t| dd}|S )Nr   r   )r   x)r    abs)r!   r"   r#   r$   Zpretty_sizeZ
size_classr   r   r   
size_label*   s   r'   
model_name	base_namefinetune_stringversion_string
model_typeLiteral['vocab', 'LoRA'] | Nonec                 C  s  |d ur|  dddd}n| d ur"|   dddd}nd}|d ur-d| nd}|d ur>d|  dd nd}	|d urOd|  dd nd}
|d urbd|  dd  nd}|d ursd|  dd nd}| | |	 |
 | | S )N -/z
ggml-modelr
   )stripreplacer   )r(   r)   r*   r+   r'   r   r,   name
parametersZfinetuneversionencodingkindr   r   r   naming_convention5   s   ""&"r8   c                   @  s@   e Zd ZU ded< ded< ded< ded< ded< dddZdS )RemoteTensorr   dtypeztuple[int, ...]shaper   offset_startsizeurlr	   	bytearrayc                 C  s   t tj| j| j| jd}|S )N)r>   startr=   )r?   SafetensorRemoteget_data_by_ranger>   r<   r=   )selfdatar   r   r   rD   T   s   zRemoteTensor.dataN)r	   r?   )__name__
__module____qualname____annotations__rD   r   r   r   r   r9   L   s   
 r9   c                   @  sn   e Zd ZdZdZdZeddd	ZedddZed ddZ	ed!d"ddZ
ed#ddZed$ddZdS )%rA   a  
    Uility class to handle remote safetensor files.
    This class is designed to work with Hugging Face model repositories.

    Example (one model has single safetensor file, the other has multiple):
        for model_id in ["ngxson/TEST-Tiny-Llama4", "Qwen/Qwen2.5-7B-Instruct"]:
            tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
            print(tensors)

    Example reading tensor data:
        tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
        for name, meta in tensors.items():
            dtype, shape, offset_start, size, remote_safetensor_url = meta
            # read the tensor data
            data = SafetensorRemote.get_data_by_range(remote_safetensor_url, offset_start, size)
            print(data)
    zhttps://huggingface.co   model_idr   r	   dict[str, RemoteTensor]c                 C  s  |  | j d| d}|r| j d| d}| |S | j d| d}|  |}|r{| |d}|d}t|}|ddusGJ d|d }	tt	|	
 }
|
  i }|
D ]}| j d| d	| }| | D ]\}}|||< qoq[|S td
| d)z
        Get list of tensors from a Hugging Face model repository.

        Returns a dictionary of tensor names and their metadata.
        Each tensor is represented as a tuple of (dtype, shape, offset_start, size, remote_safetensor_url)
        r0   z/resolve/main/model.safetensorsz*/resolve/main/model.safetensors.index.jsonr   utf-8
weight_mapNz"weight_map not found in index filez/resolve/main/zModel z# does not have any safetensor files)check_file_existBASE_DOMAINget_list_tensorsrB   decodejsonloadsgetlistsetvaluessortitems
ValueError)clsrJ   Zis_single_filer>   	index_urlZis_multiple_filesZ
index_dataZ	index_strZ
index_jsonrM   	all_filesZtensorsfilekeyvalr   r   r   get_list_tensors_hf_modelq   s,   	




z*SafetensorRemote.get_list_tensors_hf_modelr>   c                 C  s   |  |\}}i }| D ]V\}}|dkrqt|ts%td| d| z%|d }|d }|d \}	}
|
|	 }||	 }t|t||||d||< W q tyc } ztd| d| d	| d
}~ww |S )z
        Get list of tensors from a remote safetensor file.

        Returns a dictionary of tensor names and their metadata.
        Each tensor is represented as a tuple of (dtype, shape, offset_start, size)
        __metadata__zInvalid metadata for tensor 'z': r:   r;   Zdata_offsets)r:   r;   r<   r=   r>   z$Missing key in metadata for tensor 'z	, meta = N)get_metadatarY   
isinstancedictrZ   r9   tupleKeyError)r[   r>   metadatadata_start_offsetresr3   metar:   r;   Zoffset_start_relativeZoffset_end_relativer=   r<   er   r   r   rP      s&   
z!SafetensorRemote.get_list_tensorstuple[dict, int]c              
   C  s   d}|  |d|}t|dk rtdtj|dd dd}d| }tj}|| dkr3||||  7 }t|d| k rItdd|  d	t| |dd|  }|d
}z
t	|}	|	|fW S  tj
yt }
 ztd|
 d}
~
ww )zz
        Get JSON metadata from a remote safetensor file.

        Returns tuple of (metadata, data_start_offset)
        i  P r   rI   z%Not enough data to read metadata sizeNlittle)	byteorderz'Could not read complete metadata. Need z bytes, got rL   z-Failed to parse safetensor metadata as JSON: )rB   r   rZ   r   
from_bytesrA   	ALIGNMENTrQ   rR   rS   JSONDecodeError)r[   r>   	read_sizeraw_dataZmetadata_lengthri   Z	alignmentZmetadata_bytesZmetadata_strrh   rl   r   r   r   rc      s(   


zSafetensorRemote.get_metadatar@   r   r=   bytesc           	      C  s   ddl }ddlm} ||}|jr|jstd| |  }|dkr/d| d||  |d< |j|d	|d
}|  |j	t
|dkrF| S d S )z
        Get raw byte data from a remote file by range.
        If size is not specified, it will read the entire file.
        r   NurlparseInvalid URL: ru   zbytes=r/   RangeTallow_redirectsheaders)requestsurllib.parserx   schemenetlocrZ   _get_request_headersrT   raise_for_statuscontentslice)	r[   r>   r@   r=   r~   rx   
parsed_urlr}   responser   r   r   rB      s   z"SafetensorRemote.get_data_by_rangeboolc                 C  s   ddl }ddlm} ||}|jr|jstd| z|  }d|d< |j|d|d}d	|j  ko7d
k W S   W S  |j	yE   Y dS w )zt
        Check if a file exists at the given URL.
        Returns True if the file exists, False otherwise.
        r   Nrw   ry   z	bytes=0-0rz   Tr{      i  F)
r~   r   rx   r   r   rZ   r   headstatus_codeRequestException)r[   r>   r~   rx   r   r}   r   r   r   r   rN      s   z!SafetensorRemote.check_file_existdict[str, str]c                 C  s,   ddi}t jdrdt jd  |d< |S )z$Prepare common headers for requests.z
User-AgentZconvert_hf_to_ggufZHF_TOKENzBearer Authorization)osenvironrT   )r[   r}   r   r   r   r     s   z%SafetensorRemote._get_request_headersN)rJ   r   r	   rK   )r>   r   r	   rK   )r>   r   r	   rm   )ru   )r>   r   r@   r   r=   r   r	   rv   )r>   r   r	   r   )r	   r   )rE   rF   rG   __doc__rO   rq   classmethodra   rP   rc   rB   rN   r   r   r   r   r   rA   [   s     $#rA   )r   r   r   r   r	   r   )r   )r   r   r   r   r	   r   )
r!   r   r"   r   r#   r   r$   r   r	   r   )N)r(   r   r)   r   r*   r   r+   r   r'   r   r   r   r,   r-   r	   r   )
__future__r   dataclassesr   typingr   r   rR   r   r    r'   r8   r9   rA   r   r   r   r   <module>   s    
	
