o
    rqi                  	   @   s   d dl Z d dlmZ d dlZd dlZd dlmZ ddlm	Z	 e Z
dd Zdd	ed
ededefddZdddZdd Zdd Zdd ZG dd deZdS )    N)OrderedDict)
get_logger   )ontologyc                 C   sL   t | g}t| d tr$|ttt |  dd | D } t| d ts|S )Nr   c                 S   s   g | ]	}|D ]}|qqS  r   ).0xsxr   r   g/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/modelscope/utils/nlp/space/utils.py
<listcomp>   s    zmax_lens.<locals>.<listcomp>)len
isinstancelistappendmaxmap)XZlensr   r   r
   max_lens   s   
r   int64r   paddingdtypereturnc           	   	   C   s   t | }tj||tjd}t|dkrt| }nCt|dkr6t| D ]\}}t|||d t|f< q#n&t|dkr\t| D ]\}}t|D ]\}}t||||d t|f< qHq@||S )N)r   r         )r   npfullZint32r   array	enumerateZastype)	r   r   r   shaperetir	   r   jr   r   r
   list2np   s   
r"   TFc                 C   s8   ddd}d}|dkr|| |||||\} }|dks| S )Nr   c                 S   s  |  |}|dkr| dfS |t| }|r.|dkr-| |d  r-|d8 }|dkr-| |d  sn|dkr>| |d  dkr>| dfS |ri|t| k rh| |  sR| |  rh|d7 }|t| k rh| |  sR| |  sRn|t| kr| |  s{| |  r| dfS | d | | | |d   |fS )Nr   r    )findr   isalphaisdigit)srtforwardbackwardsidxidxZidx_rr   r   r
   clean_replace_single)   s&   
$$$ z+clean_replace.<locals>.clean_replace_singler#   r   r   )r(   r)   r*   r+   r,   r/   r-   r   r   r
   clean_replace'   s   
r1   c                 C   s
   t | S N)r   r   )r   r   r   r
   py2npC   s   
r3   c                 C   s@   t | d}tj||dd W d    d S 1 sw   Y  d S )Nwr   )indent)openjsondump)fnZdicfr   r   r
   
write_dictG   s   "r;   c                    sv   t  fdd|D }tdt || }tdt  | }||| d  }||| d  }d| | || d  }|S )Nc                    s   g | ]}| v r|qS r   r   )r   r*   
label_listr   r
   r   M   s    zf1_score.<locals>.<listcomp>r   g|=r   )r   r   )r=   Z	pred_listtpfpr9   	precisionZrecallf1r   r<   r
   f1_scoreL   s   rB   c                   @   sz   e Zd ZdddZdd Zdd Zdd	 Zd
d Zdd Zdd Z	dd Z
d ddZdd Zdd Zdd Zd!ddZdS )"MultiWOZVocabr   c                 C   s6   || _ d| _i | _i | _i | _dD ]}| | qdS )z,
        vocab for multiwoz dataset
        r   )z[PAD]z<go_r>z[UNK]z<go_b>z<go_a>z<eos_u>z<eos_r>z<eos_b>z<eos_a>z<go_d>z<eos_d>N)
vocab_sizevocab_size_oov	_idx2word	_word2idx
_freq_dict_absolute_add_word)selfrD   r4   r   r   r
   __init__X   s   zMultiWOZVocab.__init__c                 C   s"   t | j}|| j|< || j|< d S r2   )r   rF   rG   )rJ   r4   r.   r   r   r
   rI   g   s   

z MultiWOZVocab._absolute_add_wordc                 C   s*   || j vr
d| j |< | j |  d7  < d S )Nr   r   rH   rJ   wordr   r   r
   add_wordl   s   

zMultiWOZVocab.add_wordc                 C   s   | j |S r2   )rH   getrM   r   r   r
   has_wordq   s   zMultiWOZVocab.has_wordc                 C   s0   || j vrt| j}|| j|< || j |< d S d S r2   )rG   r   rF   )rJ   rN   r.   r   r   r
   _add_to_vocabt   s
   


zMultiWOZVocab._add_to_vocabc                    s  t  j  fddd}tdt|t j   t|t j  jk r8t	d
t|t j  j tjdg D ]}d| d } | q>tjD ]}d| d } | qOtjD ]} | q`|D ]}|d	r{|dr{ | qj|D ]} | q~t j _d S )
Nc                    s    j |   S r2   rL   )r	   rJ   r   r
   <lambda>|   s    z)MultiWOZVocab.construct.<locals>.<lambda>)key!Vocabulary size including oov: %dz4actual label set smaller than that configured: {}/{}Zgeneral[]z[value_)sortedrH   keysloggerinfor   rF   rD   loggingwarningformatr   Zall_domainsrR   Zall_actsZ	all_slots
startswithendswithrE   )rJ   Zfreq_dict_sortedrN   r   rS   r
   	constructz   s8   


zMultiWOZVocab.constructc                 C   s   t t|d ddd | _t t|d ddd | _i | _| j D ]	\}}|| j|< q&t| j| _	t
d| d  t
d| j	  d S )	N
.freq.jsonr)   zutf-8)encoding.word2idx.jsonzvocab file loaded from ""rV   )r7   loadsr6   readrH   rG   rF   itemsr   rE   r[   r\   )rJ   
vocab_pathr4   r.   r   r   r
   
load_vocab   s   
zMultiWOZVocab.load_vocabc                 C   s>   t t| j dd dd}t|d | j t|d | d S )Nc                 S   s   | d S )Nr   r   )kvr   r   r
   rT      s    z*MultiWOZVocab.save_vocab.<locals>.<lambda>T)rU   reversere   rc   )r   rY   rH   ri   r;   rG   )rJ   rj   rH   r   r   r
   
save_vocab   s   zMultiWOZVocab.save_vocabTc                 C   sH   |r| j |d d u rtd| | j | S || j vrdn|}| j | S )Nz6Unknown word: %s. Vocabulary should include oovs here.z<unk>)rG   rP   
ValueError)rJ   rN   Zinclude_oovr   r   r
   encode   s   

zMultiWOZVocab.encodec                        fdd|D S )Nc                       g | ]}  |qS r   )rp   r   _rS   r   r
   r          z1MultiWOZVocab.sentence_encode.<locals>.<listcomp>r   )rJ   Z	word_listr   rS   r
   sentence_encode      zMultiWOZVocab.sentence_encodec                 C   s   || j krdS |S )Nr   )rD   )rJ   r.   r   r   r
   oov_idx_map   rw   zMultiWOZVocab.oov_idx_mapc                    rq   )Nc                    rr   r   )rx   rs   rS   r   r
   r      ru   z2MultiWOZVocab.sentence_oov_map.<locals>.<listcomp>r   )rJ   Z
index_listr   rS   r
   sentence_oov_map   rw   zMultiWOZVocab.sentence_oov_mapFc                 C   s>   | j |std| |r|| jk r| j | S | j | d S )Nz3Error idx: %d. Vocabulary should include oovs here.z(o))rF   rP   ro   rD   )rJ   r.   Zindicate_oovr   r   r
   decode   s   
zMultiWOZVocab.decodeNr0   )T)F)__name__
__module____qualname__rK   rI   rO   rQ   rR   rb   rk   rn   rp   rv   rx   ry   rz   r   r   r   r
   rC   V   s    

rC   )r   r   )TF)r]   collectionsr   r7   numpyr   Zmodelscope.utils.loggerr    r   r[   r   objectr"   r1   r3   r;   rB   rC   r   r   r   r
   <module>   s   

