o
    0 i'&                     @  s   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m	Z	 d dl
mZmZmZ dejvrOeejjjjd  rOejd eeejjj d dlZedZG d	d
 d
eZd$ddZd%ddZd&dd Zd'd!d"Zed#kr}e  dS dS )(    )annotationsN)Path)tqdm)AnySequence
NamedTupleZNO_LOCAL_GGUFzgguf-pyzgguf-new-metadatac                   @  s6   e Zd ZU ded< ded< dZded< dZd	ed
< dS )MetadataDetailszgguf.GGUFValueTypetyper   value strdescriptionNzgguf.GGUFValueType | Nonesub_type)__name__
__module____qualname____annotations__r   r    r   r   j/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/gguf/scripts/gguf_new_metadata.pyr      s
   
 r   readergguf.GGUFReaderkeyr   returnr   c                 C  s   |  |}|r| S d S N)	get_fieldcontents)r   r   fieldr   r   r   get_field_data   s   
r   
token_listSequence[int]tokenc                   s6    fddt | D }t|dkrtd  d|S )Nc                   s   g | ]
\}}| kr|qS r   r   ).0indexr
   r    r   r   
<listcomp>%   s    zfind_token.<locals>.<listcomp>r   zUnable to find "z" in token list!)	enumeratelenLookupError)r   r    Z	token_idsr   r#   r   
find_token$   s   r(   writergguf.GGUFWriternew_metadatadict[str, MetadataDetails]remove_metadataSequence[str]Nonec                 C  sv  | j  D ]}|jtjjjks|jdrt	d|j  q|jtjj
jr9tjj
j|v r9t	d|j  q|j|v rHt	d|j  q|jd }|tjjkrX|jd nd }t|| |d}||j|}|j|v rt	d|j d	|j d
|j d|j  ||j= n|jd urt	d|j  |jd ur|j|j|j|j|jd u r|n|jd qtjj
j|v rt	d ||tjj
j j |tjj
j= | D ]\}	}t	d|	 d	|j d|j  ||	|j|j qd}
| jD ]}|
|j7 }
||j|jj|jj|jj|j  qt!d|
ddd}|"  |#  |$  | jD ]}|%|j |&|j q%|'  d S )NzGGUF.zSuppressing z	Skipping z	Removing r   )r   z
Modifying z: "z" -> "z" zCopying zAdding chat template(s)zAdding ZWritingbyteT)ZdesctotalunitZ
unit_scale)(fieldsvaluesnameggufKeysGeneralARCHITECTURE
startswithloggerdebug	TokenizerCHAT_TEMPLATEtypesGGUFValueTypeZARRAYr   r   getr
   r   Zadd_key_valuer	   r   Zadd_chat_templateitemsZtensorsZn_bytesZadd_tensor_infodatashapeZdtypenbytesZtensor_typer   Zwrite_header_to_fileZwrite_kv_data_to_fileZwrite_ti_data_to_fileZwrite_tensor_dataupdateclose)r   r)   r+   r-   r   val_typer   old_valvalr   total_bytesZtensorbarr   r   r   copy_with_new_metadata-   sR    


*


(
 

$
rN   c               
   C  s0  dd t jjj D } tdd | D }tjdd}|jdt	dd |jd	t	d
d |jdt
ddd |jdt
ddd |jdt
ddd |jdt	ddd |jdt
ddd |jddt
ddd |jd dt
d!d"d#| d$fd% |jd&dt
d'd"d#| d(fd% |jd)d*d+d, |jd-d*d.d, |ttjd"krd nd/g}tj|jrtjntjd0 i }|jpg }|jrtt jj|j|t jjj< |jrtt jj|j|t jjj< |jrtt jj|jd1rt !|jn|j|t jjj"< |j#r)t$|j#d2!}t %|}|&d3}|rtt jj||t jjj"< W d    n	1 s$w   Y  |j'r:tt jj|j'|t jjj(< |rht)*d4 t)*d5 t)*d6 |j+sht)*d7 t,d8}	|	d9krht)-d: t.d; t)-d<|j,  t /|j,d2}
t0|
t jjj1}t0|
t jjj2pg }|j3pg D ]M\}}||vrt)*d=| d> qt4||}tt jj5|d; d?| ||| < t|d@krt)*dA| dB|d;  dC t)*dDdEd |D  q|j6pg D ]K\}}||vrt)*d=| d> q|7 st8dF| dGt9|}|d;kr)|t|k r)tt jj5|d?||  ||| < qt8dH| dIt:j;<|j=rd|j+sdt)*d4 t)*dJ|j= dK t)*d7 t,d8}	|	d9krdt)-d: t.d; t)-dL|j=  t j>|j=||
j?dM}t0|
t jjj@}|d urt)AdN|  ||_BtC|
||| d S )ONc                 s  s(    | ]}| d sttjj|V  qdS )_N)r;   getattrr7   r8   r>   r!   nr   r   r   	<genexpr>h   s   & zmain.<locals>.<genexpr>c                 s  s:    | ]}| d r|dd dtd   |fV  qdS )Z	_token_id.r0   N)endswithsplitr&   rQ   r   r   r   rS   i   s   8 z,Make a copy of a GGUF file with new metadata)r   inputz GGUF format model input filename)r	   helpoutputz!GGUF format model output filenamez--general-namezThe models general.namez"name")r	   rX   metavarz--general-descriptionzThe models general.descriptionz"Description ..."z--chat-templatez:Chat template string (or JSON string containing templates)z"{% ... %} ..."z--chat-template-configz'Config file containing chat template(s)ztokenizer_config.jsonz--pre-tokenizerzThe models tokenizer.ggml.prez"pre tokenizer"z--remove-metadataappendz/Remove metadata (by key name) from output modelzgeneral.url)actionr	   rX   rZ   z--special-tokenzSpecial token by value   z | z	"<token>")r\   r	   rX   nargsrZ   z--special-token-by-idzSpecial token by id0z--force
store_truez$Bypass warnings without confirmation)r\   rX   z	--verbosezIncrease output verbosityz--help)level[rchat_templatez&*** Warning *** Warning *** Warning **z=* Most metadata is required for a fully functional GGUF file,z@* removing crucial metadata may result in a corrupt output file!z<* Enter exactly YES if you are positive you want to proceed:zYES, I am sure> YESz(You didn't enter YES. Okay then, see ya!r   z* Loading: zUnknown special token "z", ignoring...z=    z
Multiple "z" tokens found, choosing ID z0, use --special-token-by-id if you want another:z, c                 s  s    | ]}t |V  qd S r   )r   )r!   ir   r   r   rS      s    z
Token ID "z" is not a valid ID!z	Token ID z is not within token list!z* The "z3" GGUF file already exists, it will be overwritten!z* Writing: )arch	endianesszSetting custom alignment: )Dr7   r8   r>   __dict__keysdictargparseArgumentParseradd_argumentr   r   join
parse_argsr&   sysargvloggingbasicConfigverboseDEBUGINFOr-   Zgeneral_namer   rA   STRINGr9   NAMEZgeneral_descriptionZDESCRIPTIONrd   r;   jsonloadsr?   Zchat_template_configopenloadrB   Zpre_tokenizerZPREr<   warningforcerW   infoexitZ
GGUFReaderr   r:   LISTZspecial_tokenr(   ZUINT32Zspecial_token_by_id	isdecimalr'   intospathisfilerY   Z
GGUFWriterri   Z	ALIGNMENTr=   Zdata_alignmentrN   )Ztokenizer_metadataZtoken_namesparserargsr+   r-   fpconfigtemplateresponser   rh   r   r6   r    idsZ	id_stringZid_intr)   Z	alignmentr   r   r   maing   s   $$
2










"

&





r   __main__)r   r   r   r   r   r   )r   r   r    r   r   r   )
r   r   r)   r*   r+   r,   r-   r.   r   r/   )r   r/   )
__future__r   rt   rm   r   rr   r{   pathlibr   r   typingr   r   r   environ__file__parentexistsr   insertr   r7   	getLoggerr<   r   r   r(   rN   r   r   r   r   r   r   <module>   s*   "



	
:j
