o
    Wi                     @  sp   d dl mZ d dlZd dlZd dlZd!ddZd"ddZd#d$ddZ			d%d&ddZd'ddZ	d#d(dd Z
dS ))    )annotationsNblobpathstrreturnbytesc              
   C  s   d| vrt | ddd}| W  d    S 1 sw   Y  | dr4dd l}|| }|  |jS zdd l}W n tyK } ztd|d }~ww |	| d}| W  d    S 1 saw   Y  d S )Nz://rbr   	buffering)zhttp://zhttps://Oblobfile is not installed. Please install it by running `pip install blobfile`.)
openread
startswithrequestsgetraise_for_statuscontentblobfileImportErrorBlobFile)r   fr   respr   e r   D/home/app/Keep/.python/lib/python3.10/site-packages/tiktoken/load.py	read_file   s,    

$r   dataexpected_hashboolc                 C  s   t |  }||kS N)hashlibsha256	hexdigest)r   r   Zactual_hashr   r   r   
check_hash   s   r"   
str | Nonec                 C  s  d}dt jv rt jd }ndt jv rt jd }ndd l}t j| d}d}|dkr/t| S t| 	 
 }t j||}t j|r|t|ddd	}| }W d    n1 s[w   Y  |d u sit||rk|S zt | W n	 ty{   Y nw t| }	|rt|	|std
|  d| ddd l}
z6t j|dd |d t|
  d }t|d}||	 W d    n1 sw   Y  t || W |	S  ty   |rւ Y |	S w )NTZTIKTOKEN_CACHE_DIRZDATA_GYM_CACHE_DIRr   zdata-gym-cacheF r   r   z'Hash mismatch for data downloaded from z (expected z<). This may indicate a corrupted download. Please try again.)exist_ok.z.tmpwb)osenvirontempfilepathjoin
gettempdirr   r   sha1encoder!   existsr   r   r"   removeOSError
ValueErroruuidmakedirsr   uuid4writerename)r   r   Zuser_specified_cache	cache_dirr*   	cache_key
cache_pathr   r   contentsr4   Ztmp_filenamer   r   r   read_file_cached$   sV   


r=   Fvocab_bpe_fileencoder_json_filevocab_bpe_hashencoder_json_hashclobber_one_byte_tokensdict[bytes, int]c                   sf  dd t dD }dd |D  d}t dD ]}||vr-|| | td| < |d7 }qt|dks6J t| | }dd |d	dd
 D }	d fdddd t|D }
~t|
}|	D ]\}}||
|| < |d7 }qbdd l}|	t||}fdd|
 D }|dd  |dd  |r|D ]}t|dkr|| |
|< q|
|ksJ |
S )Nc                 S  s(   g | ]}t | rt |d kr|qS ) )chrisprintable.0br   r   r   
<listcomp>b   s   ( z3data_gym_to_mergeable_bpe_ranks.<locals>.<listcomp>   c                 S  s   i | ]}t ||qS r   )rE   rG   r   r   r   
<dictcomp>d   s    z3data_gym_to_mergeable_bpe_ranks.<locals>.<dictcomp>r      c                 S  s   g | ]}t | qS r   )tuplesplit)rH   Z	merge_strr   r   r   rJ   o   s    
valuer   r   r   c                   s   t  fdd| D S )Nc                 3  s    | ]} | V  qd S r   r   rG   data_gym_byte_to_byter   r   	<genexpr>r   s    zKdata_gym_to_mergeable_bpe_ranks.<locals>.decode_data_gym.<locals>.<genexpr>r   )rR   rS   r   r   decode_data_gymq   s   z8data_gym_to_mergeable_bpe_ranks.<locals>.decode_data_gymc                 S  s   i | ]
\}}t |g|qS r   rV   )rH   irI   r   r   r   rL   v   s    c                   s   i | ]	\}} ||qS r   r   )rH   kv)rW   r   r   rL      s    s   <|endoftext|>s   <|startoftext|>)rR   r   r   r   )rangeappendrE   lenr=   decoderO   	enumeratejsonloadsitemspop)r>   r?   r@   rA   rB   Zrank_to_intbytenrI   Zvocab_bpe_contentsZ
bpe_merges	bpe_ranksfirstsecondr`   Zencoder_jsonZencoder_json_loadedrY   r   )rT   rW   r   data_gym_to_mergeable_bpe_ranksZ   s>   

rh   re   tiktoken_bpe_fileNonec              
   C  s   zdd l }W n ty } ztd|d }~ww ||d*}t|  dd dD ]\}}|t|d t|	  d  q)W d    d S 1 sKw   Y  d S )	Nr   r
   r'   c                 S  s   | d S )NrM   r   )xr   r   r   <lambda>   s    z#dump_tiktoken_bpe.<locals>.<lambda>)key       
)
r   r   r   sortedrb   r7   base64	b64encoder   r/   )re   ri   r   r   r   tokenrankr   r   r   dump_tiktoken_bpe   s   &"ru   c                 C  sv   t | |}i }| D ]-}|sqz| \}}t||t|< W q ty8 } ztd|d|  |d }~ww |S )NzError parsing line z in )r=   
splitlinesrO   intrq   	b64decode	Exceptionr3   )ri   r   r<   retliners   rt   r   r   r   r   load_tiktoken_bpe   s   
r|   )r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   r#   r   r   )NNF)r>   r   r?   r   r@   r#   rA   r#   rB   r   r   rC   )re   rC   ri   r   r   rj   )ri   r   r   r#   r   rC   )
__future__r   rq   r   r(   r   r"   r=   rh   ru   r|   r   r   r   r   <module>   s    

9
: