o
    [qi                     @   s4  d dl mZ esdev rddlmZ nd dlZzd dlZW n e	y*   d dlZY nw dd Z
dd	 Zd
d Zdd ZG dd deZG dd deZee G dd deZee G dd deZee G dd deZee dd Zdd ZG dd deZee G dd deZee d d! Zd dl Z d dl!Z!d dl Z d dl"Z"d dl#Z$d d"l%m&Z& d d#l%m'Z' d$d% Z(d&d' Z)ej*Z+ej*Z,e-ed(ej. e-ed(ej. ej/e_0ej1e_2d)D ]Z3e)ee3 qe(e e(e e(e eZ4eZ5dd*l6m7Z7 ee"j89e:e$j;<d+d, G d-d. d.eZ=dS )/    )version_info.   )_sentencepieceNc                 C   sB   z	d| j   }W n tjy   d}Y nw d| jj| jj|f S )Nz	proxy of  z<%s.%s; %s >)this__repr____builtin__	Exception	__class__
__module____name__)selfZstrthis r   ]/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/sentencepiece/__init__.py
_swig_repr   s   r   c                        fdd}|S )Nc                    sj   |dkr | || d S |dkr| j | d S t| |r/ttt| |tr/ | || d S td|  )Nr   thisownz(You cannot add instance attributes to %s)r   ownhasattr
isinstancegetattrtypepropertyAttributeErrorr   namevaluesetr   r   set_instance_attr   s   zE_swig_setattr_nondynamic_instance_variable.<locals>.set_instance_attrr   )r   r    r   r   r   *_swig_setattr_nondynamic_instance_variable   s   	r!   c                    r   )Nc                    s6   t | |rtt| |ts | || d S td|  )Nz%You cannot add class attributes to %s)r   r   r   r   r   )clsr   r   r   r   r   set_class_attr)   s   z?_swig_setattr_nondynamic_class_variable.<locals>.set_class_attrr   )r   r#   r   r   r   '_swig_setattr_nondynamic_class_variable(   s   r$   c                    r   )zlClass decorator for adding a metaclass to a SWIG wrapped class - a slimmed down version of six.add_metaclassc                    s    | j | j| j S N)r   	__bases____dict__copy)r"   	metaclassr   r   wrapper3      z$_swig_add_metaclass.<locals>.wrapperr   )r*   r+   r   r)   r   _swig_add_metaclass1   s   r-   c                   @   s   e Zd ZdZeejZdS )_SwigNonDynamicMetazKMeta class to enforce nondynamic attributes (no new attributes) for a classN)r   r   __qualname____doc__r$   r   __setattr__r   r   r   r   r.   8   s    r.   c                   @   s   e Zd Zedd dd ddZeZdd Zej	Z
dd	 Zd
d Zdd Zdd Zdd Zdd Zdd ZeeZeeZeeZeeZeeZeeZeeZdd Zdd Zdd ZeZdS )1ImmutableSentencePieceText_ImmutableSentencePiecec                 C   
   | j  S r%   r   r   xr   r   r   <lambda>>      
 z:ImmutableSentencePieceText_ImmutableSentencePiece.<lambda>c                 C      | j |S r%   r4   r6   vr   r   r   r7   >       The membership flagdocc                 C      t | t   d S r%   )r   Z:ImmutableSentencePieceText_ImmutableSentencePiece_swiginitZ5new_ImmutableSentencePieceText_ImmutableSentencePiecer   r   r   r   __init__A      z:ImmutableSentencePieceText_ImmutableSentencePiece.__init__c                 C   
   t | S r%   )r   Z8ImmutableSentencePieceText_ImmutableSentencePiece__piecerA   r   r   r   _pieceE      
z8ImmutableSentencePieceText_ImmutableSentencePiece._piecec                 C   rD   r%   )r   Z:ImmutableSentencePieceText_ImmutableSentencePiece__surfacerA   r   r   r   _surfaceH   rF   z:ImmutableSentencePieceText_ImmutableSentencePiece._surfacec                 C   rD   r%   )r   Z5ImmutableSentencePieceText_ImmutableSentencePiece__idrA   r   r   r   _idK   rF   z5ImmutableSentencePieceText_ImmutableSentencePiece._idc                 C   rD   r%   )r   Z8ImmutableSentencePieceText_ImmutableSentencePiece__beginrA   r   r   r   _beginN   rF   z8ImmutableSentencePieceText_ImmutableSentencePiece._beginc                 C   rD   r%   )r   Z6ImmutableSentencePieceText_ImmutableSentencePiece__endrA   r   r   r   _endQ   rF   z6ImmutableSentencePieceText_ImmutableSentencePiece._endc                 C   rD   r%   )r   ZCImmutableSentencePieceText_ImmutableSentencePiece__surface_as_bytesrA   r   r   r   _surface_as_bytesT   rF   zCImmutableSentencePieceText_ImmutableSentencePiece._surface_as_bytesc                 C   rD   r%   )r   ZAImmutableSentencePieceText_ImmutableSentencePiece__piece_as_bytesrA   r   r   r   _piece_as_bytesW   rF   zAImmutableSentencePieceText_ImmutableSentencePiece._piece_as_bytesc                 C   s   d | j| j| j| j| jS )Nz3piece: "{}"
id: {}
surface: "{}"
begin: {}
end: {}
)formatpieceidsurfacebeginendrA   r   r   r   __str__b   s
   z9ImmutableSentencePieceText_ImmutableSentencePiece.__str__c                 C   s<   | j |j ko| j|jko| j|jko| j|jko| j|jkS r%   )rN   rO   rP   rQ   rR   r   otherr   r   r   __eq__j   s   <z8ImmutableSentencePieceText_ImmutableSentencePiece.__eq__c                 C   s   t t| S r%   )hashstrrA   r   r   r   __hash__m      z:ImmutableSentencePieceText_ImmutableSentencePiece.__hash__N)r   r   r/   r   r   r   r   rB   r   Z8delete_ImmutableSentencePieceText_ImmutableSentencePiece__swig_destroy__rE   rG   rH   rI   rJ   rK   rL   rN   Zpiece_as_bytesrP   Zsurface_as_bytesrO   rQ   rR   rS   rV   rY   r   r   r   r   r2   =   s.    r2   c                   @   s   e Zd Zedd dd ddZeZdd Zej	Z
dd	 Zd
d Zdd Zdd Zdd Zdd ZeeZeeZeeZG dd dZedd Zdd Zdd Zdd ZeZdS )ImmutableSentencePieceTextc                 C   r3   r%   r4   r5   r   r   r   r7   v   r8   z#ImmutableSentencePieceText.<lambda>c                 C   r9   r%   r4   r:   r   r   r   r7   v   r<   r=   r>   c                 C   r@   r%   )r   Z#ImmutableSentencePieceText_swiginitZnew_ImmutableSentencePieceTextrA   r   r   r   rB   y   rC   z#ImmutableSentencePieceText.__init__c                 C   rD   r%   )r   Z'ImmutableSentencePieceText__pieces_sizerA   r   r   r   _pieces_size}   rF   z'ImmutableSentencePieceText._pieces_sizec                 C      t | |S r%   )r   Z"ImmutableSentencePieceText__piecesr   indexr   r   r   _pieces   rZ   z"ImmutableSentencePieceText._piecesc                 C   rD   r%   )r   Z ImmutableSentencePieceText__textrA   r   r   r   _text   rF   z ImmutableSentencePieceText._textc                 C   rD   r%   )r   Z!ImmutableSentencePieceText__scorerA   r   r   r   _score   rF   z!ImmutableSentencePieceText._scorec                 C   rD   r%   )r   Z,ImmutableSentencePieceText_SerializeAsStringrA   r   r   r   SerializeAsString   rF   z,ImmutableSentencePieceText.SerializeAsStringc                 C   rD   r%   )r   Z)ImmutableSentencePieceText__text_as_bytesrA   r   r   r   _text_as_bytes   rF   z)ImmutableSentencePieceText._text_as_bytesc                   @   0   e Zd Zdd Zdd Zdd Zdd ZeZd	S )
z9ImmutableSentencePieceText.ImmutableSentencePieceIteratorc                 C      || _ | j  | _d S r%   )protor]   lenr   rh   r   r   r   rB         zBImmutableSentencePieceText.ImmutableSentencePieceIterator.__init__c                 C      | j S r%   ri   rA   r   r   r   __len__      zAImmutableSentencePieceText.ImmutableSentencePieceIterator.__len__c                    j   t |tr fddt jD |j|j|j S |dk r"| j }|dk s+| jkr/td j	|S )Nc                       g | ]} j |qS r   )rh   ra   .0irA   r   r   
<listcomp>       zYImmutableSentencePieceText.ImmutableSentencePieceIterator.__getitem__.<locals>.<listcomp>r   zpiece index is out of range)
r   slicerangeri   startstopstep
IndexErrorrh   ra   r_   r   rA   r   __getitem__      
(
zEImmutableSentencePieceText.ImmutableSentencePieceIterator.__getitem__c                 C      d dd | D S )N
c                 S      g | ]	}d  t|qS zpieces {{
{}}}rM   rX   rs   r6   r   r   r   ru          zUImmutableSentencePieceText.ImmutableSentencePieceIterator.__str__.<locals>.<listcomp>joinrA   r   r   r   rS      rC   zAImmutableSentencePieceText.ImmutableSentencePieceIterator.__str__Nr   r   r/   rB   rn   r}   rS   r   r   r   r   r   ImmutableSentencePieceIterator       	r   c                 C   rD   r%   )r\   r   rA   r   r   r   pieces      
z!ImmutableSentencePieceText.piecesc                 C      |   |  kS r%   rd   rT   r   r   r   rV         z!ImmutableSentencePieceText.__eq__c                 C      t |  S r%   rW   rd   rA   r   r   r   rY      rZ   z#ImmutableSentencePieceText.__hash__c                 C   s$   d | j| jddd | jD S )Nztext: "{}"
score: {}
{}r   c                 S   r   r   r   r   r   r   r   ru      r   z6ImmutableSentencePieceText.__str__.<locals>.<listcomp>)rM   textscorer   r   rA   r   r   r   rS      s
   
z"ImmutableSentencePieceText.__str__N)r   r   r/   r   r   r   r   rB   r   Z!delete_ImmutableSentencePieceTextr[   r]   ra   rb   rc   rd   re   r   Ztext_as_bytesr   r   r   rV   rY   rS   r   r   r   r   r\   u   s*    
r\   c                   @   s   e Zd Zedd dd ddZeZdd Zej	Z
dd	 Zd
d Zdd ZG dd dZedd Zdd Zdd Zdd ZeZdS )ImmutableNBestSentencePieceTextc                 C   r3   r%   r4   r5   r   r   r   r7      r8   z(ImmutableNBestSentencePieceText.<lambda>c                 C   r9   r%   r4   r:   r   r   r   r7      r<   r=   r>   c                 C   r@   r%   )r   Z(ImmutableNBestSentencePieceText_swiginitZ#new_ImmutableNBestSentencePieceTextrA   r   r   r   rB      rC   z(ImmutableNBestSentencePieceText.__init__c                 C   rD   r%   )r   Z,ImmutableNBestSentencePieceText__nbests_sizerA   r   r   r   _nbests_size   rF   z,ImmutableNBestSentencePieceText._nbests_sizec                 C   r^   r%   )r   Z'ImmutableNBestSentencePieceText__nbestsr_   r   r   r   _nbests   rZ   z'ImmutableNBestSentencePieceText._nbestsc                 C   rD   r%   )r   Z1ImmutableNBestSentencePieceText_SerializeAsStringrA   r   r   r   rd      rF   z1ImmutableNBestSentencePieceText.SerializeAsStringc                   @   rf   )
zBImmutableNBestSentencePieceText.ImmutableSentencePieceTextIteratorc                 C   rg   r%   )rh   r   ri   rj   r   r   r   rB      rk   zKImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__init__c                 C   rl   r%   rm   rA   r   r   r   rn      ro   zJImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__len__c                    rp   )Nc                    rq   r   )rh   r   rr   rA   r   r   ru      rv   zbImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__getitem__.<locals>.<listcomp>r   znbests index is out of range)
r   rw   rx   ri   ry   rz   r{   r|   rh   r   r_   r   rA   r   r}      r~   zNImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__getitem__c                 C   r   )Nr   c                 S   r   znbests {{
{}}}r   r   r   r   r   ru      r   z^ImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__str__.<locals>.<listcomp>r   rA   r   r   r   rS      rC   zJImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator.__str__Nr   r   r   r   r   "ImmutableSentencePieceTextIterator   r   r   c                 C   rD   r%   )r   r   rA   r   r   r   nbests   r   z&ImmutableNBestSentencePieceText.nbestsc                 C   r   r%   r   rT   r   r   r   rV      r   z&ImmutableNBestSentencePieceText.__eq__c                 C   r   r%   r   rA   r   r   r   rY      rZ   z(ImmutableNBestSentencePieceText.__hash__c                 C   s   d dd | jD S )Nr   c                 S   r   r   r   r   r   r   r   ru      r   z;ImmutableNBestSentencePieceText.__str__.<locals>.<listcomp>)r   r   rA   r   r   r   rS      r,   z'ImmutableNBestSentencePieceText.__str__N)r   r   r/   r   r   r   r   rB   r   Z&delete_ImmutableNBestSentencePieceTextr[   r   r   rd   r   r   rV   rY   rS   r   r   r   r   r      s    
r   c                   @   s  e Zd Zedd dd ddZeZdd Zej	Z
dd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Z d4d5 Z!d6d7 Z"d8d9 Z#d:d; Z$d<d= Z%d>d? Z&d@dA Z'dBdC Z(dDdE Z)dFdG Z*dHdI Z+dJdK Z,dLdM Z-dNdO Z.dPdQ Z/dRdS Z0dTdU Z1dVdW Z2dXdY Z3dZd[ Z4d\d] Z5d^d_ Z6d`da Z7dbdc Z8ddde Z9dfdg Z:dhdi Z;djdk Z<dldm Z=dndo Z>dpdq Z?drds Z@dtdu ZAdvdw ZBdxdxeCdydydydydydzd{dzfd|d}ZD	x	x	x	x	x	x	x	x	xdd~dZEdd ZFdd ZGdd ZHdd ZIdddZJdddZKdddZLdddZM	x	x	x	x	x	xdddZNdddZOdddZPdddZQdddZR	x	x	x	x	x	x	x	x	xdddZSdddZTdddZUdddZVdddZWeXdxfddZYeXfddZZeXfddZ[dddZ\dddZ]dddZ^dddZ_dddZdddZ`dd Zadd Zbdd Zcdd Zddd Zedd ZfddĄ ZgdddƄZhdxS )SentencePieceProcessorc                 C   r3   r%   r4   r5   r   r   r   r7      r8   zSentencePieceProcessor.<lambda>c                 C   r9   r%   r4   r:   r   r   r   r7      r<   r=   r>   c                 C   r@   r%   )r   ZSentencePieceProcessor_swiginitZnew_SentencePieceProcessorrA   r   r   r   rB      rC   zSentencePieceProcessor.__init__c                 C   r^   r%   )r   Z.SentencePieceProcessor_LoadFromSerializedProtor   Z
serializedr   r   r   LoadFromSerializedProto   rZ   z.SentencePieceProcessor.LoadFromSerializedProtoc                 C   r^   r%   )r   Z,SentencePieceProcessor_SetEncodeExtraOptionsr   Zextra_optionr   r   r   SetEncodeExtraOptions  rZ   z,SentencePieceProcessor.SetEncodeExtraOptionsc                 C   r^   r%   )r   Z,SentencePieceProcessor_SetDecodeExtraOptionsr   r   r   r   SetDecodeExtraOptions  rZ   z,SentencePieceProcessor.SetDecodeExtraOptionsc                 C   r^   r%   )r   Z$SentencePieceProcessor_SetVocabulary)r   Zvalid_vocabr   r   r   SetVocabulary  rZ   z$SentencePieceProcessor.SetVocabularyc                 C   rD   r%   )r   Z&SentencePieceProcessor_ResetVocabularyrA   r   r   r   ResetVocabulary  rF   z&SentencePieceProcessor.ResetVocabularyc                 C      t | ||S r%   )r   Z%SentencePieceProcessor_LoadVocabulary)r   filename	thresholdr   r   r   LoadVocabulary     z%SentencePieceProcessor.LoadVocabularyc                 G   s   t j| g|R  S r%   )r   Z'SentencePieceProcessor_CalculateEntropyr   argsr   r   r   CalculateEntropy  s   z'SentencePieceProcessor.CalculateEntropyc                 C   rD   r%   )r   Z#SentencePieceProcessor_GetPieceSizerA   r   r   r   GetPieceSize  rF   z#SentencePieceProcessor.GetPieceSizec                 C   r^   r%   )r   Z SentencePieceProcessor_PieceToIdr   rN   r   r   r   	PieceToId  rZ   z SentencePieceProcessor.PieceToIdc                 C   r^   r%   )r   Z SentencePieceProcessor_IdToPiecer   rO   r   r   r   	IdToPiece  rZ   z SentencePieceProcessor.IdToPiecec                 C   r^   r%   )r   ZSentencePieceProcessor_GetScorer   r   r   r   GetScore  rZ   zSentencePieceProcessor.GetScorec                 C   r^   r%   )r   Z SentencePieceProcessor_IsUnknownr   r   r   r   	IsUnknown   rZ   z SentencePieceProcessor.IsUnknownc                 C   r^   r%   )r   Z SentencePieceProcessor_IsControlr   r   r   r   	IsControl#  rZ   z SentencePieceProcessor.IsControlc                 C   r^   r%   )r   ZSentencePieceProcessor_IsUnusedr   r   r   r   IsUnused&  rZ   zSentencePieceProcessor.IsUnusedc                 C   r^   r%   )r   ZSentencePieceProcessor_IsByter   r   r   r   IsByte)  rZ   zSentencePieceProcessor.IsBytec                 C   rD   r%   )r   ZSentencePieceProcessor_unk_idrA   r   r   r   unk_id,  rF   zSentencePieceProcessor.unk_idc                 C   rD   r%   )r   ZSentencePieceProcessor_bos_idrA   r   r   r   bos_id/  rF   zSentencePieceProcessor.bos_idc                 C   rD   r%   )r   ZSentencePieceProcessor_eos_idrA   r   r   r   eos_id2  rF   zSentencePieceProcessor.eos_idc                 C   rD   r%   )r   ZSentencePieceProcessor_pad_idrA   r   r   r   pad_id5  rF   zSentencePieceProcessor.pad_idc                 C   rD   r%   )r   Z-SentencePieceProcessor_serialized_model_protorA   r   r   r   serialized_model_proto8  rF   z-SentencePieceProcessor.serialized_model_protoc                 C   r^   r%   )r   Z#SentencePieceProcessor_LoadFromFiler   argr   r   r   LoadFromFile;  rZ   z#SentencePieceProcessor.LoadFromFilec	           	      C      t | ||||||||	S r%   )r   Z#SentencePieceProcessor__EncodeAsIds	r   r   enable_sampling
nbest_sizealphaadd_bosadd_eosreverseemit_unk_piecer   r   r   _EncodeAsIds>     z#SentencePieceProcessor._EncodeAsIdsc	           	      C   r   r%   )r   Z&SentencePieceProcessor__EncodeAsPiecesr   r   r   r   _EncodeAsPiecesA  r   z&SentencePieceProcessor._EncodeAsPiecesc	           	      C   r   r%   )r   Z/SentencePieceProcessor__EncodeAsSerializedProtor   r   r   r   _EncodeAsSerializedProtoD  r   z/SentencePieceProcessor._EncodeAsSerializedProtoc	           	      C   r   r%   )r   Z.SentencePieceProcessor__EncodeAsImmutableProtor   r   r   r   _EncodeAsImmutableProtoG  r   z.SentencePieceProcessor._EncodeAsImmutableProtoc
           
      C      t | |||||||||	
S r%   )r   Z(SentencePieceProcessor__EncodeAsIdsBatch
r   insnum_threadsr   r   r   r   r   r   r   r   r   r   _EncodeAsIdsBatchJ     z(SentencePieceProcessor._EncodeAsIdsBatchc
           
      C   r   r%   )r   Z+SentencePieceProcessor__EncodeAsPiecesBatchr   r   r   r   _EncodeAsPiecesBatchM  r   z+SentencePieceProcessor._EncodeAsPiecesBatchc
           
      C   r   r%   )r   Z4SentencePieceProcessor__EncodeAsSerializedProtoBatchr   r   r   r   _EncodeAsSerializedProtoBatchP  r   z4SentencePieceProcessor._EncodeAsSerializedProtoBatchc
           
      C   r   r%   )r   Z3SentencePieceProcessor__EncodeAsImmutableProtoBatchr   r   r   r   _EncodeAsImmutableProtoBatchS  r   z3SentencePieceProcessor._EncodeAsImmutableProtoBatchc                 C   r^   r%   )r   Z!SentencePieceProcessor__DecodeIdsr   idsr   r   r   
_DecodeIdsV  rZ   z!SentencePieceProcessor._DecodeIdsc                 C   r^   r%   )r   Z(SentencePieceProcessor__DecodeIdsAsBytesr   r   r   r   _DecodeIdsAsBytesY  rZ   z(SentencePieceProcessor._DecodeIdsAsBytesc                 C   r^   r%   )r   Z$SentencePieceProcessor__DecodePiecesr   r   r   r   r   _DecodePieces\  rZ   z$SentencePieceProcessor._DecodePiecesc                 C   r^   r%   )r   Z2SentencePieceProcessor__DecodeIdsAsSerializedProtor   r   r   r   _DecodeIdsAsSerializedProto_  rZ   z2SentencePieceProcessor._DecodeIdsAsSerializedProtoc                 C   r^   r%   )r   Z5SentencePieceProcessor__DecodePiecesAsSerializedProtor   r   r   r   _DecodePiecesAsSerializedProtob  rZ   z5SentencePieceProcessor._DecodePiecesAsSerializedProtoc                 C   r^   r%   )r   Z1SentencePieceProcessor__DecodeIdsAsImmutableProtor   r   r   r   _DecodeIdsAsImmutableProtoe  rZ   z1SentencePieceProcessor._DecodeIdsAsImmutableProtoc                 C   r^   r%   )r   Z4SentencePieceProcessor__DecodePiecesAsImmutableProtor   r   r   r   _DecodePiecesAsImmutableProtoh  rZ   z4SentencePieceProcessor._DecodePiecesAsImmutableProtoc                 C   r   r%   )r   Z&SentencePieceProcessor__DecodeIdsBatchr   r   r   r   r   r   _DecodeIdsBatchk  r   z&SentencePieceProcessor._DecodeIdsBatchc                 C   r   r%   )r   Z-SentencePieceProcessor__DecodeIdsAsBytesBatchr   r   r   r   _DecodeIdsAsBytesBatchn  r   z-SentencePieceProcessor._DecodeIdsAsBytesBatchc                 C   r   r%   )r   Z7SentencePieceProcessor__DecodeIdsAsSerializedProtoBatchr   r   r   r    _DecodeIdsAsSerializedProtoBatchq  r   z7SentencePieceProcessor._DecodeIdsAsSerializedProtoBatchc                 C   r   r%   )r   Z6SentencePieceProcessor__DecodeIdsAsImmutableProtoBatchr   r   r   r   _DecodeIdsAsImmutableProtoBatcht  r   z6SentencePieceProcessor._DecodeIdsAsImmutableProtoBatchc                 C   r   r%   )r   Z)SentencePieceProcessor__DecodePiecesBatchr   r   r   r   _DecodePiecesBatchw  r   z)SentencePieceProcessor._DecodePiecesBatchc                 C   r   r%   )r   Z:SentencePieceProcessor__DecodePiecesAsSerializedProtoBatchr   r   r   r   #_DecodePiecesAsSerializedProtoBatchz  r   z:SentencePieceProcessor._DecodePiecesAsSerializedProtoBatchc                 C   r   r%   )r   Z9SentencePieceProcessor__DecodePiecesAsImmutableProtoBatchr   r   r   r   "_DecodePiecesAsImmutableProtoBatch}  r   z9SentencePieceProcessor._DecodePiecesAsImmutableProtoBatchc              	   C      t | ||||||S r%   )r   Z(SentencePieceProcessor__NBestEncodeAsIdsr   r   r   r   r   r   r   r   r   r   _NBestEncodeAsIds  r,   z(SentencePieceProcessor._NBestEncodeAsIdsc              	   C   r   r%   )r   Z+SentencePieceProcessor__NBestEncodeAsPiecesr   r   r   r   _NBestEncodeAsPieces  r,   z+SentencePieceProcessor._NBestEncodeAsPiecesc              	   C   r   r%   )r   Z4SentencePieceProcessor__NBestEncodeAsSerializedProtor   r   r   r   _NBestEncodeAsSerializedProto  r,   z4SentencePieceProcessor._NBestEncodeAsSerializedProtoc              	   C   r   r%   )r   Z3SentencePieceProcessor__NBestEncodeAsImmutableProtor   r   r   r   _NBestEncodeAsImmutableProto  r,   z3SentencePieceProcessor._NBestEncodeAsImmutableProtoc
           
      C   r   r%   )r   Z1SentencePieceProcessor__SampleEncodeAndScoreAsIds
r   r   num_samplesr   worinclude_bestr   r   r   r   r   r   r   _SampleEncodeAndScoreAsIds  r   z1SentencePieceProcessor._SampleEncodeAndScoreAsIdsc
           
      C   r   r%   )r   Z4SentencePieceProcessor__SampleEncodeAndScoreAsPiecesr   r   r   r   _SampleEncodeAndScoreAsPieces  r   z4SentencePieceProcessor._SampleEncodeAndScoreAsPiecesc
           
      C   r   r%   )r   Z=SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProtor   r   r   r   &_SampleEncodeAndScoreAsSerializedProto  r   z=SentencePieceProcessor._SampleEncodeAndScoreAsSerializedProtoc
           
      C   r   r%   )r   Z<SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProtor   r   r   r   %_SampleEncodeAndScoreAsImmutableProto  r   z<SentencePieceProcessor._SampleEncodeAndScoreAsImmutableProtoc                 C   r^   r%   )r   Z!SentencePieceProcessor__Normalizer   r   r   r   r   
_Normalize  rZ   z!SentencePieceProcessor._Normalizec                 C   r^   r%   )r   Z,SentencePieceProcessor__NormalizeWithOffsetsr   r   r   r   _NormalizeWithOffsets  rZ   z,SentencePieceProcessor._NormalizeWithOffsetsc                 C   r   r%   )r   Z(SentencePieceProcessor__CalculateEntropy)r   r   r   r   r   r   _CalculateEntropy  r   z(SentencePieceProcessor._CalculateEntropyc                 C   s   t | |||S r%   )r   Z-SentencePieceProcessor__CalculateEntropyBatch)r   r   r   r   r   r   r   _CalculateEntropyBatch  r   z-SentencePieceProcessor._CalculateEntropyBatchc                 C   r^   r%   )r   Z.SentencePieceProcessor__OverrideNormalizerSpecr   r   r   r   _OverrideNormalizerSpec  rZ   z.SentencePieceProcessor._OverrideNormalizerSpecNFg?c                 C   s\   t |  || _|| _|| _|| _|| _|| _|	| _|
| _|| _	|s#|r,| j
||d dS dS )a  Initialzie sentencepieceProcessor.

      Args:
        model_file: The sentencepiece model file path.
        model_proto: The sentencepiece model serialized proto.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after
          reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout.
                    nbest_size = {0,1}: No sampling is performed.
                    nbest_size > 1: samples from the nbest_size results.
                    nbest_size < 0: assuming that nbest_size is infinite and samples
                      from the all hypothesis (lattice) using
                      forward-filtering-and-backward-sampling algorithm.
        alpha: Soothing parameter for unigram sampling, and dropout probability of
               merge operations for BPE-dropout.
        num_threads: number of threads in batch processing (Default = -1, auto-detected)
      )
model_filemodel_protoN)$_sentencepiece_processor_init_native	_out_type_add_bos_add_eos_reverse_emit_unk_piece_enable_sampling_nbest_size_alpha_num_threadsLoad)r   r   r   out_typer   r   r   r   r   r   r   r   r   r   r   Init  s   "zSentencePieceProcessor.Initc                 C   s  |du r| j }|du r| j}|du r| j}|du r| j}|du r#| j}|du r*| j}|du r1| j}|	du r8| j}	|
du r?| j}
|dkrW|du sS|dksS|dksS|	du rWt	d|
du sat
|
turet	dt
|tu r|tu r|| ||
|||	||||	S |tu r| ||
|||	||||	S |dks|dkr| ||
|||	||||	S |d	kr| ||
|||	||||	S |tu r| ||||	||||S |tu r| ||||	||||S |dks|dkr| ||||	||||S |d	kr| ||||	||||S t	d
|)a~  Encode text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after
                 reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout.
                    nbest_size = {0,1}: No sampling is performed.
                    nbest_size > 1: samples from the nbest_size results.
                    nbest_size < 0: assuming that nbest_size is infinite and samples
                    from the all hypothesis (lattice) using
                    forward-filtering-and-backward-sampling algorithm.
        alpha: Soothing parameter for unigram sampling, and merge probability for
               BPE-dropout (probablity 'p' in BPE-dropout paper).
        num_threads: the number of threads used in the batch processing (Default = -1).
      NTr   r   a  When enable_sampling is True, We must specify "nbest_size > 1" or "nbest_size = -1", and "alpha". "nbest_size" is enabled only on unigram mode ignored in BPE-dropout. when "nbest_size = -1" , this method samples from all candidates on the lattice instead of nbest segmentations.num_threads must be intserialized_protorh   immutable_protozunknown out_type={})r   r   r   r   r  r  r  r  r  RuntimeErrorr   intlistr   rX   r   r   r   r   r   r   r   rM   )r   inputr  r   r   r   r   r   r   r   r   r   r   r   Encode  sv    











zSentencePieceProcessor.Encodec                 K      | j d|td|S Nr  r  r   r  rX   r   r  kwargsr   r   r   EncodeAsPieces5  r,   z%SentencePieceProcessor.EncodeAsPiecesc                 K   r  r  r  r  r  r   r   r   EncodeAsIds9  r,   z"SentencePieceProcessor.EncodeAsIdsc                 K      | j d|dd|S )Nr
  r  r   r  r  r   r   r   EncodeAsSerializedProto=  r,   z.SentencePieceProcessor.EncodeAsSerializedProtoc                 K   r  )Nr  r  r   r  r  r   r   r   EncodeAsImmutableProtoA  r,   z-SentencePieceProcessor.EncodeAsImmutableProtoc                 K      | j d|||tdd|S NTr  r   r   r  r   r   r  r   r  r   r   r  r   r   r   SampleEncodeAsPiecesE  
   z+SentencePieceProcessor.SampleEncodeAsPiecesc                 K   r  r  r  r!  r   r   r   SampleEncodeAsIdsJ  r#  z(SentencePieceProcessor.SampleEncodeAsIdsc                 K      | j d|||ddd|S )Nr
  Tr   r   r  r!  r   r   r   SampleEncodeAsSerializedProtoO  r#  z4SentencePieceProcessor.SampleEncodeAsSerializedProtoc                 K   r%  )Nr  Tr   r   r  r!  r   r   r   SampleEncodeAsImmutableProtoT  r#  z3SentencePieceProcessor.SampleEncodeAsImmutableProtoc                    s   du rj du rjdu rjdu rjdu r#jdu r*jdkr0dfdd t|tu rK fdd|D S  |S )a  NBestEncode text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str.
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        nbest_size: nbest size
      Nr   r   c                    s   t u r|  S tu r|  S dks$dkr.|  S dkr<|  S td)Nr
  rh   r  zunknown out_type)r  r   rX   r   r   r   r  r   )r   r   r   r   r  r   r   r   r   _encode}  s"   z3SentencePieceProcessor.NBestEncode.<locals>._encodec                       g | ]} |qS r   r   rs   nr)  r   r   ru         z6SentencePieceProcessor.NBestEncode.<locals>.<listcomp>)r   r   r   r   r  r  r   r  )r   r  r  r   r   r   r   r   r   )r)  r   r   r   r   r  r   r   r   NBestEncodeY  s$   z"SentencePieceProcessor.NBestEncodec                 K      | j d||td|S Nr  r   r  r   )r/  rX   r   r  r   r  r   r   r   NBestEncodeAsPieces  
   
z*SentencePieceProcessor.NBestEncodeAsPiecesc                 K   r0  r1  )r/  r  r3  r   r   r   NBestEncodeAsIds  r5  z'SentencePieceProcessor.NBestEncodeAsIdsc                 K      | j d||dd|S )Nr
  r2  r   r/  r3  r   r   r   NBestEncodeAsSerializedProto  r5  z3SentencePieceProcessor.NBestEncodeAsSerializedProtoc                 K   r7  )Nr  r2  r   r8  r3  r   r   r   NBestEncodeAsImmutableProto  r5  z2SentencePieceProcessor.NBestEncodeAsImmutableProtoc              
      s   du r	j du r	jdu r	jdu r	jdu r#	jdu r)ddu r/d
du r5d
du r;ddkrCtdrK
sKtd	
f
dd	 t|tu ri fd
d|D S  |S )a   SampleEncodeAndScore text input to segmented ids or tokens.

        Args:
        input: input string. accepsts list of string.
        out_type: output type. int or str or 'serialized_proto' or 'immutable_proto'
        add_bos: Add <s> to the result (Default = false)
        add_eos: Add </s> to the result (Default = false) <s>/</s> is added after reversing (if enabled).
        reverse: Reverses the tokenized sequence (Default = false)
        emit_unk_piece: Emits the unk literal string (Default = false)
        num_samples: How many samples to return (Default = 1)
        alpha: inverse temperature for sampling
        wor: whether to sample without replacement (Default = false)
        include_best: whether to include the best tokenization, requires wor=True (Default = false)
      Nr   g      ?Fr   znum_examples must be positivez8When include_best is True, We must specify "wor = True".c                    s   t u r| 	 	S tu r"| 	 	S dks*dkr7| 	 	S dkrH| 	 	S td)Nr
  rh   r  zunknown output type)r  r   rX   r   r   r   r  r(  )
r   r   r   r   r   r   r  r   r   r   r   r   r)    s"   z<SentencePieceProcessor.SampleEncodeAndScore.<locals>._encodec                    r*  r   r   r+  r-  r   r   ru     r.  z?SentencePieceProcessor.SampleEncodeAndScore.<locals>.<listcomp>)r   r   r   r   r  r  r   r  )r   r  r  r   r   r   r   r   r   r   r   r   )r)  r   r   r   r   r   r   r  r   r   r   r   SampleEncodeAndScore  s4   z+SentencePieceProcessor.SampleEncodeAndScorec                 K      | j d|||td|S Nr  r   r   r  r   )r;  rX   r   r  r   r   r  r   r   r   SampleEncodeAndScoreAsPieces  
   z3SentencePieceProcessor.SampleEncodeAndScoreAsPiecesc                 K   r<  r=  )r;  r  r?  r   r   r   SampleEncodeAndScoreAsIds  rA  z0SentencePieceProcessor.SampleEncodeAndScoreAsIdsc                 K      | j d|||dd|S )Nr
  r>  r   r;  r?  r   r   r   %SampleEncodeAndScoreAsSerializedProto  rA  z<SentencePieceProcessor.SampleEncodeAndScoreAsSerializedProtoc                 K   rC  )Nr  r>  r   rD  r?  r   r   r   $SampleEncodeAndScoreAsImmutableProto  rA  z;SentencePieceProcessor.SampleEncodeAndScoreAsImmutableProtoc                 C   s  |du r| j }|du st|turtd|sdS |tu rt|tu r)| |gS t|tu r5| |gS t|tu rt|dksIt|d tu rN| |S t|d tu r[| |S t|d tu rt|d dksut|d d tu r{| 	||S t|d d tu r| 
||S |tu rt|tu r| |gS t|tu r| |gS t|tu rt|dkst|d tu r| |S t|d tu r| |S t|d tu rt|d dkst|d d tu r| ||S t|d d tu r| 
||S |dkrzt|tu r| |gS t|tu r| |gS t|tu rzt|dks3t|d tu r8| |S t|d tu rF| |S t|d tu rzt|d dksct|d d tu ri| ||S t|d d tu rz| ||S |dkrt|tu r| |gS t|tu r| |gS t|tu rt|dkst|d tu r| |S t|d tu r| |S t|d tu rt|d dkst|d d tu r| ||S t|d d tu r| ||S td)zDecode processed id or token sequences.

      Args:
        out_type: output type. str, bytes or 'serialized_proto' or 'immutable_proto' (Default = str)
        num_threads: the number of threads used in the batch processing (Default = -1).
      Nr	  r   r   r
  r  zunknown output or input type)r  r   r  r  rX   r   r   r  ri   r   r   bytesr   r   r   r   r   r   r   r   r   r   )r   r  r  r   r   r   r   Decode  s   

$

$
 

(
 

(zSentencePieceProcessor.Decodec                 K      | j d||d|S r  rH  r   r  r  r  r   r   r   DecodePiecesg  r,   z#SentencePieceProcessor.DecodePiecesc                 K   rI  r  rJ  rK  r   r   r   	DecodeIdsk  r,   z SentencePieceProcessor.DecodeIdsr
  c                 K   rI  r  rJ  rK  r   r   r   DecodePiecesAsSerializedProtoo  r,   z4SentencePieceProcessor.DecodePiecesAsSerializedProtoc                 K   rI  r  rJ  rK  r   r   r   DecodeIdsAsSerializedProtos  r,   z1SentencePieceProcessor.DecodeIdsAsSerializedProtor  c                 K   rI  r  rJ  rK  r   r   r   DecodePiecesAsImmutableProtow  r,   z3SentencePieceProcessor.DecodePiecesAsImmutableProtoc                 K   rI  r  rJ  rK  r   r   r   DecodeIdsAsImmutableProto{  r,   z0SentencePieceProcessor.DecodeIdsAsImmutableProtoc                 C   sP   t |tu r"|du r| j}|du st |turtd| |||S | ||S )zCalculate sentence entropyNr	  )r   r  r  r  r  r   r   )r   r  r   r   r   r   r   r     s   c                    4   fdd t |tu r fdd|D S  |S )Nc                       r  | S  | S r%   r   r   r(  r   with_offsetsr   r   
_normalize     

z4SentencePieceProcessor.Normalize.<locals>._normalizec                    r*  r   r   r   rW  r   r   ru     r.  z4SentencePieceProcessor.Normalize.<locals>.<listcomp>r   r  r   r  rV  r   rW  r   rV  r   	Normalize     z SentencePieceProcessor.Normalizec                 K   s,   i }|  D ]
\}}t|||< q| |S r%   )itemsrX   r   )r   r  
new_kwargskeyr   r   r   r   OverrideNormalizerSpec  s   
z-SentencePieceProcessor.OverrideNormalizerSpecc                 C      |   S r%   r   rA   r   r   r   
piece_size     z!SentencePieceProcessor.piece_sizec                 C   rc  r%   rd  rA   r   r   r   
vocab_size  rf  z!SentencePieceProcessor.vocab_sizec                 C   rc  r%   r   rA   r   r   r   __getstate__  rf  z#SentencePieceProcessor.__getstate__c                 C      |    | | d S r%   rB   r   r   r   r   r   r   __setstate__     z#SentencePieceProcessor.__setstate__c                 C   rc  r%   rd  rA   r   r   r   rn     rf  zSentencePieceProcessor.__len__c                 C   s
   |  |S r%   )r   r   r   r   r   r}     rF   z"SentencePieceProcessor.__getitem__c                 C   s(   |r|rt d|r| |S | |S )a  Overwride SentencePieceProcessor.Load to support both model_file and model_proto.

      Args:
        model_file: The sentencepiece model file path.
        model_proto: The sentencepiece model serialized proto. Either `model_file`
          or `model_proto` must be set.
      z-model_file and model_proto must be exclusive.)r  r   r   )r   r   r   r   r   r   r    s
   

zSentencePieceProcessor.Load)	NNNNNNNNNNN)NNNNNNr%   )r
  )r  )ir   r   r/   r   r   r   r   rB   r   Zdelete_SentencePieceProcessorr[   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r"  r$  r&  r'  r/  r4  r6  r9  r:  r;  r@  rB  rE  rF  rX   rH  rL  rM  rN  rO  rP  rQ  r]  rb  re  rg  ri  rm  rn   r}   r  r   r   r   r   r      s
   
2
^





:




M


_






r   c                 C   rD   r%   )r   SetRandomGeneratorSeed)seedr   r   r   rp    rF   rp  c                 C   rD   r%   )r   SetMinLogLevel)r;   r   r   r   rr    rF   rr  c                   @   s   e Zd Zedd dd ddZdd ZeZedd	 Z	ed
d Z
edd Zedd Zedd ZedddZedddZdS )SentencePieceTrainerc                 C   r3   r%   r4   r5   r   r   r   r7     r8   zSentencePieceTrainer.<lambda>c                 C   r9   r%   r4   r:   r   r   r   r7     r<   r=   r>   c                 O   s   t d)NzNo constructor defined)r   )r   r   r  r   r   r   rB     rf  zSentencePieceTrainer.__init__c                 C   rD   r%   )r   Z%SentencePieceTrainer__TrainFromString)r   r   r   r   _TrainFromString  r   z%SentencePieceTrainer._TrainFromStringc                 C   rD   r%   )r   Z"SentencePieceTrainer__TrainFromMapr   r   r   r   _TrainFromMap  r   z"SentencePieceTrainer._TrainFromMapc                 C   r^   r%   )r   Z#SentencePieceTrainer__TrainFromMap2r   iterr   r   r   _TrainFromMap2     z#SentencePieceTrainer._TrainFromMap2c                 C   rD   r%   )r   Z#SentencePieceTrainer__TrainFromMap3ru  r   r   r   _TrainFromMap3  r   z#SentencePieceTrainer._TrainFromMap3c                 C   r^   r%   )r   Z#SentencePieceTrainer__TrainFromMap4rw  r   r   r   _TrainFromMap4  rz  z#SentencePieceTrainer._TrainFromMap4Nc           	      K   s   | durt | tu rt| S dd }d}d}i }| D ]\}}|dv r(|}q|dv r/|}q||||< q|rM|rAt||}nt|}|| dS |rUt||S t	|S )zDTrain Sentencepiece model. Accept both kwargs and legacy string arg.Nc                 S   sZ   t | tu r)tjd dkrt }nt }tj|dd}|dd | D  |	 S t
| S )zEncode value to CSV..r      r   )lineterminatorc                 S   s   g | ]}t |qS r   )rX   )rs   r;   r   r   r   ru     r.  z@SentencePieceTrainer._Train.<locals>._encode.<locals>.<listcomp>)r   r  sysr   StringIOBytesIOcsvwriterwriterowgetvaluerX   )r   fr  r   r   r   r)    s   z,SentencePieceTrainer._Train.<locals>._encode)sentence_iteratorZsentence_reader)model_writer)
r   rX   rs  rt  r_  r|  r{  writery  rv  )	r   r  r)  r  r  r`  ra  r   r   r   r   r   _Train  s.   



zSentencePieceTrainer._Trainc                 K   sD   t |d tjdd| i| W d    d S 1 sw   Y  d S )N)ostreamr   r   )
_LogStreamrs  r  )r   Z	logstreamr  r   r   r   Train  s   "zSentencePieceTrainer.Trainr%   ro  )r   r   r/   r   r   rB   r   r   staticmethodrt  rv  ry  r{  r|  r  r  r   r   r   r   rs    s$    




,rs  c                   @   s   e Zd Zedd dd ddZeZdd Zej	Z
dd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Z							d"ddZd#ddZdd Zd d! ZdS )$SentencePieceNormalizerc                 C   r3   r%   r4   r5   r   r   r   r7     r8   z SentencePieceNormalizer.<lambda>c                 C   r9   r%   r4   r:   r   r   r   r7     r<   r=   r>   c                 C   r@   r%   )r   Z SentencePieceNormalizer_swiginitZnew_SentencePieceNormalizerrA   r   r   r   rB      rC   z SentencePieceNormalizer.__init__c                 C   r^   r%   )r   Z/SentencePieceNormalizer_LoadFromSerializedProtor   r   r   r   r   $  rZ   z/SentencePieceNormalizer.LoadFromSerializedProtoc                 C   r^   r%   )r   Z'SentencePieceNormalizer_LoadFromRuleTSV)r   r   r   r   r   LoadFromRuleTSV'  rZ   z'SentencePieceNormalizer.LoadFromRuleTSVc                 C   r^   r%   )r   Z(SentencePieceNormalizer_LoadFromRuleName)r   r   r   r   r   LoadFromRuleName*  rZ   z(SentencePieceNormalizer.LoadFromRuleNamec                 C   rD   r%   )r   Z.SentencePieceNormalizer_serialized_model_protorA   r   r   r   r   -  rF   z.SentencePieceNormalizer.serialized_model_protoc                 C   r^   r%   )r   Z$SentencePieceNormalizer_LoadFromFiler   r   r   r   r   0  rZ   z$SentencePieceNormalizer.LoadFromFilec                 C   r^   r%   )r   Z"SentencePieceNormalizer__Normalizer   r   r   r   r   3  rZ   z"SentencePieceNormalizer._Normalizec                 C   r^   r%   )r   Z-SentencePieceNormalizer__NormalizeWithOffsetsr   r   r   r   r   6  rZ   z-SentencePieceNormalizer._NormalizeWithOffsetsc                 C   r   r%   )r   Z&SentencePieceNormalizer__SetProtoFieldr   r   r   r   _SetProtoField9  r   z&SentencePieceNormalizer._SetProtoFieldNFc           	      C   s   t |  |r| |}n|r| |}n|r| |}n|r$| |}ntd|r>| d| | d| | d| dS dS )a  Initialzie sentencePieceNormalizer.

      Args:
        model_file: The sentencepiece model file path.
        model_proto: The sentencepiece model serialized proto.
        rule_tsv: The normalization rule file in TSV format.
        rule_name: Pre-defined normalization name.
        add_dummy_prefix: add dummy prefix.
        escape_whitespaces: escape whitespaces.
        remove_extra_whitespaces: remove extra whitespaces.
      zno model is specifiedadd_dummy_prefixescape_whitespacesremove_extra_whitespacesN)%_sentencepiece_normalizer_init_nativer   r   r  r  r  r  )	r   r   r   Zrule_tsvZ	rule_namer  r  r  statusr   r   r   r  <  s   zSentencePieceNormalizer.Initc                    rR  )Nc                    rS  r%   rT  r(  rU  r   r   rW  c  rX  z5SentencePieceNormalizer.Normalize.<locals>._normalizec                    r*  r   r   r   rY  r   r   ru   i  r.  z5SentencePieceNormalizer.Normalize.<locals>.<listcomp>rZ  r[  r   r\  r   r]  b  r^  z!SentencePieceNormalizer.Normalizec                 C   rc  r%   rh  rA   r   r   r   ri  m  rf  z$SentencePieceNormalizer.__getstate__c                 C   rj  r%   rk  rl  r   r   r   rm  q  rn  z$SentencePieceNormalizer.__setstate__)NNNNFFFr%   )r   r   r/   r   r   r   r   rB   r   Zdelete_SentencePieceNormalizerr[   r   r  r  r   r   r   r   r  r  r]  ri  rm  r   r   r   r   r    s0    

&r  c                 C   rD   r%   )r   
SetDataDir)data_dirr   r   r   r  y  rF   r  )r  )r  c                 C   sh   i }| j  D ]\}}td|r"tdd| dd}|||< q| D ]
\}}t| || q'dS )z1Added snake_cased method from CammelCased method.z^[A-Z]+z(?<!^)(?=[A-Z])_Zn_bestZnbestN)r'   r_  rematchsublowerreplacesetattr)	classnameZ	snake_mapkr;   snaker   r   r   _add_snake_case  s   r  c                    s4   t | |dfdd  fdd}t| || dS )z4Enables batch request for the method classname.name.Nc                    s2   t |tu r|dk s||  krtd | |S )Nr   zpiece id is out of range.)r   r  re  r|   )r;   r,  )funcr   r   _func  s    
z_batchnize.<locals>._funcc                    s*   t |tu r fdd|D S  |S )Nc                    s   g | ]} |qS r   r   r+  )r  r   r   r   ru     s    z5_batchnize.<locals>._batched_func.<locals>.<listcomp>rZ  r   )r  rA   r   _batched_func  s   
z!_batchnize.<locals>._batched_func)r   r  )r  r   r  r   )r  r  r   
_batchnize  s   r  rB   )r   r   r   r   r   r   r   )__version__Zsentencepiecepackage_datac                   @   s&   e Zd ZdddZdd Zdd ZdS )	r  Nc                 C   s$   || _ | j d urtj | _d S d S r%   )r  r  stderrfilenoorig_stream_fileno)r   r  r   r   r   rB     s   
z_LogStream.__init__c                 C   s4   | j d urt| j| _t| j  | j d S d S r%   )r  osdupr  orig_stream_dupdup2r  rA   r   r   r   	__enter__  s   
z_LogStream.__enter__c                 C   sD   | j d ur t| j t| j| j t| j | j   d S d S r%   )r  r  closer  r  r  )r   r   r   	tracebackr   r   r   __exit__  s   
z_LogStream.__exit__r%   )r   r   r/   rB   r  r  r   r   r   r   r    s    
r  )>r  r   Z_swig_python_version_info__package__r   r   r   builtinsr	   ImportErrorr   r!   r$   r-   r   r.   objectr2   Z>ImmutableSentencePieceText_ImmutableSentencePiece_swigregisterr\   Z'ImmutableSentencePieceText_swigregisterr   Z,ImmutableNBestSentencePieceText_swigregisterr   Z#SentencePieceProcessor_swigregisterrp  rr  rs  Z!SentencePieceTrainer_swigregisterr  Z$SentencePieceNormalizer_swigregisterr  r  r  r  importlib.resources	importlibior  r  r  r  rB   r   r  r  r  r  ZTokenizerH  Z
DetokenizemZset_random_generator_seedZset_min_log_level_versionr  pathr   rX   	resourcesfilesr  r   r   r   r   <module>   sv   	
7
H
8     
S
O
[