o
    Ó0 i‚  ã                   @  sŒ   d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
mZ d dlmZ ddlmZ d dlZe d¡ZeG d	d
„ d
ƒƒZdS )é    )ÚannotationsN)ÚPath)ÚAnyÚLiteralÚOptional)Ú	dataclassé   )ÚKeysÚmetadatac                   @  s¤  e Zd ZU dZded< dZded< dZded< dZded< dZded< dZ	ded< dZ
ded	< dZded
< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< ed>d?d%d&„ƒZed@dAd(d)„ƒZed@dBd*d+„ƒZed@dBd,d-„ƒZ ed.d/„ ƒZ!edCdDd2d3„ƒZ"ed>dEd8d9„ƒZ#dFd<d=„Z$dS )GÚMetadataNúOptional[str]ÚnameÚauthorÚversionÚorganizationÚfinetuneÚbasenameÚdescriptionÚquantized_byÚ
size_labelÚurlÚdoiÚuuidÚrepo_urlÚ
source_urlÚ
source_doiÚsource_uuidÚsource_repo_urlÚlicenseÚlicense_nameÚlicense_linkzOptional[list[dict]]Úbase_modelszOptional[list[str]]ÚtagsÚ	languagesÚdatasetsr   Úmetadata_override_pathúOptional[Path]Ú
model_pathÚ
model_nameÚtotal_paramsÚintÚreturnc                 C  s  t ƒ }t  |¡}t  |¡}t  |||||¡}t  | ¡}| tjj|j	¡|_	| tjj
|j¡|_| tjj|j¡|_| tjj|j¡|_| tjj|j¡|_| tjj|j¡|_| tjj|j¡|_| tjj|j¡|_| tjj|j¡|_| tjj|j¡|_| tjj|j¡|_| tjj|j¡|_| tjj |j!¡|_!| tjj"|j#¡|_#| tjj$|j%¡|_%| tjj&|j'¡|_'| tjj(|j)¡|_)| tjj*|j+¡|_+| tjj,|j-¡|_-| d|j.¡|_.| d|j/¡|_/| tjj0|j1¡|_1| tjj2|j3¡|_3|d ur||_	|S )Nzgeneral.base_modelszgeneral.datasets)4r   Úload_model_cardÚload_hf_parametersÚapply_metadata_heuristicÚload_metadata_overrideÚgetr	   ZGeneralÚNAMEr   ZAUTHORr   ÚVERSIONr   ZORGANIZATIONr   ZFINETUNEr   ZBASENAMEr   ZDESCRIPTIONr   ZQUANTIZED_BYr   Z
SIZE_LABELr   ZLICENSE_NAMEr   ZLICENSE_LINKr    ÚURLr   ZDOIr   ÚUUIDr   ZREPO_URLr   Z
SOURCE_URLr   Z
SOURCE_DOIr   ZSOURCE_UUIDr   ZSOURCE_REPO_URLr   r!   r$   ZTAGSr"   Z	LANGUAGESr#   )r%   r'   r(   r)   r
   Ú
model_cardÚ	hf_paramsZmetadata_override© r7   úY/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/gguf/metadata.pyÚload.   s>   



zMetadata.loadúdict[str, Any]c                 C  sP   | d u s|   ¡ s
i S t| ddd}t |¡W  d   ƒ S 1 s!w   Y  d S )NÚrúutf-8©Úencoding)Úis_fileÚopenÚjsonr9   )r%   Úfr7   r7   r8   r/   j   s
   $ÿzMetadata.load_metadata_overridec           	      C  s2  | d u s|   ¡ s
i S | d }| ¡ si S d}t|dddQ}| ¡ }| ¡ }g }t|ƒdkr7i W  d   ƒ S t|ƒdkrL|d dkrLi W  d   ƒ S |dd … D ]}|dkrZ n| |¡ qRd	 |¡d	 }W d   ƒ n1 sqw   Y  | d
d¡}|r—t	 
|¡}t|tƒrŠ|S t dt|ƒ› d¡ i S i S )Nz	README.mdÚ r;   r<   r=   r   z---r   Ú
z- no
z- "no"
z3while reading YAML model card frontmatter, data is z instead of dict)Úis_dirr?   r@   ÚreadÚ
splitlinesÚlenÚappendÚjoinÚreplaceÚyamlZ	safe_loadÚ
isinstanceÚdictÚloggerÚerrorÚtype)	r'   Zmodel_card_pathZyaml_contentrB   ÚcontentÚlinesZ
lines_yamlÚlineÚdatar7   r7   r8   r,   r   s<   ú÷
ñ

zMetadata.load_model_cardc                 C  sd   | d u s|   ¡ s
i S | d }| ¡ si S t|ddd}t |¡W  d   ƒ S 1 s+w   Y  d S )Nzconfig.jsonr;   r<   r=   )rE   r?   r@   rA   r9   )r'   Zconfig_pathrB   r7   r7   r8   r-   ž   s   $ÿzMetadata.load_hf_parametersc                 C  s$   d  dd„ |  ¡  dd¡ ¡ D ƒ¡S )Nú c                 S  s,   g | ]}|  ¡ rt d |¡s| ¡ n|‘qS )z^(v\d+(?:\.\d+)*|\d.*)$)ÚislowerÚreÚmatchÚtitle)Ú.0Úwr7   r7   r8   Ú
<listcomp>®   s   , z(Metadata.id_to_title.<locals>.<listcomp>ú-)rJ   ÚstriprK   Úsplit)Ústringr7   r7   r8   Úid_to_title«   s   $zMetadata.id_to_titleÚmodel_idúMtuple[str | None, str | None, str | None, str | None, str | None, str | None]c              	   C  s”  | d u rdS d| v r| d d d d d fS d| v r|   dd¡\}}nd | }}|d ur6t|ƒdkr6|d dkr6d }|  d¡}ttt|ƒƒƒD ]}t|| ƒdkrP||= qCdd	„ |D ƒ}t|ƒD ]\}}t d
|tj¡rq||  d¡ q\t d|tj¡r‡||  d¡ | 	¡ ||< q\|dkr:t d|tj¡r:| 
dd¡}|d  ¡ r±|d d… d |d  |d  }t|ƒdkrÏ|d  ¡ rÏ|d dv rÏ|d d… |d  	¡  }|dkr%zFt|d d… ƒtdd |d ¡ƒ }|dk rô|t|ƒd k s|dkrt|| ƒd| d kr||  d¡ |d d… |d  ¡  }W n
 ty$   Y nw t|| ƒdkr5||  d¡ |||< q\|dkrct d|tj¡rc|dk r\| ¡ dkr\||  d¡ q\||  d¡ q\tdd„ t||ƒD ƒƒrt||ƒD ]\}	}
d|
v rŽtdd„ |	D ƒƒrŽ|
 d¡ qvd}t||ƒD ]3\}}
|r¸t|
ƒdkr¬|d  ¡ s±d|
v r¸|
 d¡ q—|r½d }t|
ƒdkrÉ|
 d¡ q—tt|ƒt|ƒƒD ]\}}
d|
v rët|
ƒdkrë|
 d¡ qÔ d d!d„ t||ƒD ƒ¡pûd }d t d"d„ t||ƒD ƒ¡ ¡ ¡pd }d d#d„ t||ƒD ƒ¡p d }d d$d„ t||ƒD ƒ¡p0d }|d u rB|d u rB|d u rBd }||||||fS )%N)NNNNNNrV   ú/r   r   Ú.r^   c                 S  s   g | ]}t ƒ ‘qS r7   )Úset)r[   Ú_r7   r7   r8   r]   Ó   s    z4Metadata.get_model_id_components.<locals>.<listcomp>z(v|iter)?\d+([.]\d+)*r   zi?q\d(_\w)*|b?fp?(16|32)rQ   zD(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)rh   éÿÿÿÿéþÿÿÿZkmbtiè  z KMBTé   é   r   r   zchat|instruct|vision|loraZlorac                 s  s.    | ]\}}d |v r|D ]}|  ¡ V  qqdS ©r   N)Ú	isdecimal)r[   ÚnÚtÚcr7   r7   r8   Ú	<genexpr>  s   €, z3Metadata.get_model_id_components.<locals>.<genexpr>c                 s  s    | ]}|  ¡ V  qd S ©N)Úisalpha)r[   rq   r7   r7   r8   rr   
  s   € Tr   Fc                 s  ó     | ]\}}d |v r|V  qdS )r   Nr7   )r[   ro   rp   r7   r7   r8   rr     ó   € c                 s  ru   rm   r7   )r[   Úsrp   r7   r7   r8   rr   !  rv   c                 s  ru   )r   Nr7   )r[   rB   rp   r7   r7   r8   rr   "  rv   c                 s  s(    | ]\}}d |v rd|vr|V  qdS )r   r   Nr7   )r[   Úvrp   r7   r7   r8   rr   %  s   €& )r`   rH   ÚreversedÚrangeÚ	enumeraterX   Ú	fullmatchÚ
IGNORECASEÚaddÚupperrK   rn   ÚfloatÚpowÚfindÚabsÚlowerÚ
ValueErrorÚanyÚzipÚallÚremovert   rJ   rN   ÚfromkeysÚkeys)rc   r)   Úorg_componentÚmodel_full_name_componentZ
name_partsÚiZ
name_typesÚpartZlabel_paramsro   rp   Zat_startr   r   r   r   r7   r7   r8   Úget_model_id_components°   s”   
 
€þ 
$$€þ
€

€,
€ *  z Metadata.get_model_id_componentsr
   r5   úOptional[dict]r6   c              
     sF  ˆd ur´d@‡ ‡fdd„}d@‡ ‡fdd„}|ddƒ |d	d	ƒ |d
d
ƒ |ddƒ |ddƒ |ddƒ |ddƒ |ddƒ |ddƒ |ddƒ |ddƒ |ddƒ |ddƒ |d	dƒ |d
dƒ |ddƒ |ddƒ |ddƒ |ddƒ |ddƒ |dd ƒ |dd!ƒ |dd"ƒ |dd#ƒ |ddƒ |d	d$ƒ |dd%ƒ d&ˆv s©d'ˆv s©d(ˆv r˜g }ˆ  d&ˆ  d'ˆ  d(d ¡¡¡}|d urÒt|tƒrÈ| |¡ n
t|tƒrÒ| |¡ ˆ jd u rÚg ˆ _|D ]»}	i }
t|	tƒr}|	 d)¡sö|	 d*¡sö|	 d+¡r;|	|
d< d,|	v r:t 	d-|	¡}|r:| 
d.¡}t ||¡\}}}}}}|d ur%t |¡|
d< |d ur1t |¡|
d< |d ur:||
d
< nVt |	|¡\}}}}}}|d urSt |¡|
d< |d ur_t |¡|
d< |d urh||
d
< |d ur||d ur|d/|› d0|› |
d< nt|	tƒr†|	}
nt d1t|	ƒ› d2¡ ˆ j |
¡ qÜd3ˆv s§d4ˆv s§d5ˆv r‘g }ˆ  d3ˆ  d4ˆ  d5d ¡¡¡}|d urÓt|tƒrÈ| |¡ nt|tƒrÓ| |¡ ˆ jd u rÜg ˆ _|D ]²}i }t|tƒru| d6¡r3||d< d,|v r2t 	d-|¡}|r2| 
d.¡}t ||¡\}}}}}}|d urt |¡|d< |d ur)t |¡|d< |d ur2||d
< nVt ||¡\}}}}}}|d urKt |¡|d< |d urWt |¡|d< |d ur`||d
< |d urt|d urtd/|› d0|› |d< nt|tƒr~|}nt d7t|ƒ› d2¡ ˆ j |¡ qÞ|d8d8ƒ |d9d9ƒ |d:d:ƒ |d;d;ƒ |d;d<ƒ |d=d=ƒ |d=d>ƒ |d ur3|  d?¡}|d ur3| d0¡d.kr3|}	t |	|¡\}}}}}}ˆ jd u rê|d urêt |¡ˆ _ˆ jd u rû|d urût |¡ˆ _ˆ jd u r	|d ur	|ˆ _ˆ jd u r|d ur|ˆ _ˆ jd u r%|d ur%|ˆ _ˆ jd u r3|d ur3|ˆ _|d ur¡|j}	t |	|¡\}}}}}}ˆ jd u rX|d urXt |¡ˆ _ˆ jd u ri|d urit |¡ˆ _ˆ jd u rw|d urw|ˆ _ˆ jd u r…|d ur…|ˆ _ˆ jd u r“|d ur“|ˆ _ˆ jd u r¡|d ur¡|ˆ _ˆ S )ANÚmetadata_keyÚstrÚmodel_card_keyc                   s6   |ˆv rt ˆ | d ƒd u rtˆ | ˆ |¡ƒ d S d S d S rs   )ÚgetattrÚsetattrr0   )r’   r”   ©r
   r5   r7   r8   Úuse_model_card_metadata5  s   ÿzBMetadata.apply_metadata_heuristic.<locals>.use_model_card_metadatac                   sj   ˆ  |d ¡}|d u rd S tˆ | d ƒ}|d u rg }t|tƒr#| |¡ n
t|tƒr-| |¡ tˆ | |ƒ d S rs   )r0   r•   rM   r“   rI   ÚlistÚextendr–   )r’   r”   Z
tags_valueÚcurrent_valuer—   r7   r8   Úuse_array_model_card_metadata9  s   


zHMetadata.apply_metadata_heuristic.<locals>.use_array_model_card_metadatar   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r(   Zmodel_authorZmodel_versionZmodel_organizationZmodel_descriptionZmodel_finetuneZmodel_basenameZmodel_size_labelZ	model_urlZ	model_doiZ
model_uuidZmodel_repo_urlZmodel_creatorZ
model_typeÚ
base_modelr!   Zbase_model_sourcesúhttp://úhttps://ússh://zhuggingface.coz&https?://huggingface.co/([^/]+/[^/]+)$r   zhttps://huggingface.co/re   zbase model entry 'z' not in a known formatr$   ÚdatasetZdataset_sources)rž   rŸ   r    zdataset entry 'r   r   r    r"   Zpipeline_tagr#   ÚlanguageZ_name_or_path)r’   r“   r”   r“   )r0   rM   r“   rI   r™   rš   r!   Ú
startswithrX   rY   Úgroupr   r   rb   rN   rO   rP   r$   Úcountr   r   r   r   r   r   )r
   r5   r6   r'   r)   r˜   rœ   Zmetadata_base_modelsZbase_model_valuerc   r   rY   Zmodel_id_componentr   rŒ   r   r   r   r   Zmetadata_datasetsZdataset_valueZ
dataset_idr¡   Zdataset_id_componentZdataset_name_componentZhf_name_or_pathr7   r—   r8   r.   -  s0  































 




€


€






€


€









z!Metadata.apply_metadata_heuristicÚgguf_writerúgguf.GGUFWriterc                 C  sL  | j d usJ ‚| | j ¡ | jd ur| | j¡ | jd ur#| | j¡ | jd ur.| | j¡ | jd ur9| 	| j¡ | j
d urD| | j
¡ | jd urO| | j¡ | jd urZ| | j¡ | jd ure| | j¡ | jd ur€t| jtƒrz| d | j¡¡ n| | j¡ | jd ur‹| | j¡ | jd ur–| | j¡ | jd ur¡| | j¡ | jd ur¬| | j¡ | jd ur·|  | j¡ | j!d urÂ| "| j!¡ | j#d urÍ| $| j#¡ | j%d urØ| &| j%¡ | j'd urã| (| j'¡ | j)d urî| *| j)¡ | j+d ur|| ,t-| j+ƒ¡ t.| j+ƒD ]z\}}d|v r| /||d ¡ d|v r| 0||d ¡ d|v r,| 1||d ¡ d|v r9| 2||d ¡ d|v rF| 3||d ¡ d|v rS| 4||d ¡ d|v r`| 5||d ¡ d	|v rm| 6||d	 ¡ d
|v rz| 7||d
 ¡ q| j8d ur
| 9t-| j8ƒ¡ t.| j8ƒD ]z\}}d|v r | :||d ¡ d|v r­| ;||d ¡ d|v rº| <||d ¡ d|v rÇ| =||d ¡ d|v rÔ| >||d ¡ d|v rá| ?||d ¡ d|v rî| @||d ¡ d	|v rû| A||d	 ¡ d
|v r| B||d
 ¡ q| jCd ur| D| jC¡ | jEd ur$| F| jE¡ d S d S )Nú,r   r   r   r   r   r   r   r   r   )Gr   Zadd_namer   Z
add_authorr   Zadd_versionr   Zadd_organizationr   Zadd_finetuner   Zadd_basenamer   Zadd_descriptionr   Zadd_quantized_byr   Zadd_size_labelr   rM   r™   Zadd_licenserJ   r   Zadd_license_namer    Zadd_license_linkr   Zadd_urlr   Zadd_doir   Zadd_uuidr   Zadd_repo_urlr   Zadd_source_urlr   Zadd_source_doir   Zadd_source_uuidr   Zadd_source_repo_urlr!   Zadd_base_model_countrH   r{   Zadd_base_model_nameZadd_base_model_authorZadd_base_model_versionZadd_base_model_organizationZadd_base_model_descriptionZadd_base_model_urlZadd_base_model_doiZadd_base_model_uuidZadd_base_model_repo_urlr$   Zadd_dataset_countZadd_dataset_nameZadd_dataset_authorZadd_dataset_versionZadd_dataset_organizationZadd_dataset_descriptionZadd_dataset_urlZadd_dataset_doiZadd_dataset_uuidZadd_dataset_repo_urlr"   Zadd_tagsr#   Zadd_languages)Úselfr¦   ÚkeyZbase_model_entryZdataset_entryr7   r7   r8   Úset_gguf_meta_model  s¶   



























€








€ÿzMetadata.set_gguf_meta_model)NNNr   )
r%   r&   r'   r&   r(   r   r)   r*   r+   r   rs   )r%   r&   r+   r:   )r'   r&   r+   r:   )Nr   )rc   r   r)   r*   r+   rd   )r
   r   r5   r‘   r6   r‘   r'   r&   r)   r*   r+   r   )r¦   r§   )%Ú__name__Ú
__module__Ú__qualname__r   Ú__annotations__r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   Ústaticmethodr9   r/   r,   r-   rb   r   r.   r«   r7   r7   r7   r8   r      sR   
 ;+
| rr   )Ú
__future__r   rX   rA   rL   ÚloggingÚpathlibr   Útypingr   r   r   Údataclassesr   Ú	constantsr	   ZggufÚ	getLoggerrO   r   r7   r7   r7   r8   Ú<module>   s    
