o
    W+ i                     @   s  d dl mZ d dlmZmZ d dlZd dlZd dl	Z
d dlmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dl m!Z! e
j"dkrie
j#j$Z
e
%  e! Z&dgZ'ej(ej)ej*dG dd deZ+dS )    N)AnyDict)MosesDetokenizerMosesPunctNormalizerMosesTokenizer)	apply_bpe)	Pipelines)Model)
OutputKeys)Pipeline)	PIPELINES)Config)	ModelFileTasks)
get_loggerz2.0TranslationPipeline)module_namec                       s   e Zd Zdef fddZdedeeef fddZdeeef deeef fdd	Z	d
eeef deeef fddZ
  ZS )r   modelc                    s8  t  jdd|i| t| jtsJ dtj | jj}t	  t
t
|tjd| _tt
|tj| _t
|| jd d d | _tdd tt| jd	d
D | _t
|| jd d d | _tdd tt| jd	d
D | _tjdd}d|j_tj|d| _tjtjddgdd| _i | _ | jd d | _!| jd d | _"t
|| jd d d | _#| j!dkrt$| _%nt&| j!d| _'t(| j!d| _%t)| j"d| _*t+,t| j#d	d
| _-| | j}| j .| | j/ #}t01d| j  tj23t4 | _5| j56|| j W d   dS 1 sw   Y  dS )zBuild a translation pipeline with a model dir or a model id in the model hub.

        Args:
            model: A Model instance.
        r   z,please check whether model config exists in zckpt-0ZdatasetZ	src_vocabfilec                 S   s   g | ]
\}}|  |fqS  strip.0iwr   r   y/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/modelscope/pipelines/nlp/translation_pipeline.py
<listcomp>6       z0TranslationPipeline.__init__.<locals>.<listcomp>zutf-8)encodingZ	trg_vocabc                 S   s   g | ]
\}}||  fqS r   r   r   r   r   r   r   :   r   T)Zallow_soft_placement)configN
input_wids)ZdtypeshapenameZpreprocessorZsrc_langZtgt_langZsrc_bpezh)langzloading model from r   )7super__init__
isinstancer   r	   r   ZCONFIGURATIONZ	model_dirtfZreset_default_graphospjoinZTF_CHECKPOINT_FOLDERZ
model_pathr   	from_filecfgZ_src_vocab_pathdict	enumerateopen
_src_vocabZ_trg_vocab_path_trg_rvocabZConfigProtoZgpu_optionsZallow_growthSession_sessionplaceholderZint64r!   output	_src_lang	_tgt_langZ_src_bpe_pathjieba_tokr   _punct_normalizerr   r   _detokr   ZBPE_bpeupdate
as_defaultloggerinfotrainZSaverZglobal_variablesZmodel_loaderrestore)selfr   kwargsZ	tf_configr6   sess	__class__r   r   r'   "   s^   



$zTranslationPipeline.__init__inputreturnc                    s   | d}jdkrfdd|D }dd |D }n+fdd|D }djdv r0jd	ks:jd	kr<jdv r<d
fdd|D }fdd|D }tdd |D  t fdd|D }d|i}|S )N<SENT_SPLIT>r$   c                       g | ]} j |qS r   )r:   cutr   itemrD   r   r   r   b       z2TranslationPipeline.preprocess.<locals>.<listcomp>c                 S   s   g | ]	}d  t|qS ) )r+   listrN   r   r   r   r   c   s    c                    rL   r   )r;   	normalizerN   rP   r   r   r   e   rQ   T)esfrenFc                    s   g | ]}j j|d  dqS )T)Z
return_straggressive_dash_splits)r:   tokenizerN   )rX   rD   r   r   r   j   s    c                    s    g | ]} j |  qS r   )r=   Zprocess_liner   splitrN   rP   r   r   r   r   s    c                 S   s   g | ]}t |qS r   lenrN   r   r   r   r   u   s    c                    s0   g | ]}fd d|D dg t |   qS )c                    s2   g | ]}| j v r j | n jd  d d qS )r   Zsrc_vocab_size   )r1   r-   )r   r   rP   r   r   r   v   s
    z=TranslationPipeline.preprocess.<locals>.<listcomp>.<listcomp>r   r[   rN   )
MAX_LENGTHrD   r   r   r   v   s    
	input_ids)rZ   r7   r8   maxnparray)rD   rI   Z	input_tokZ	input_bper_   resultr   )r^   rX   rD   r   
preprocess^   s*   


zTranslationPipeline.preprocessc                 C   sR   | j   | j|d i}| j j| j|d}|W  d    S 1 s"w   Y  d S )Nr_   )	feed_dict)r4   r?   r!   runr6   )rD   rI   re   Zsess_outputsr   r   r   forward}   s
   $zTranslationPipeline.forwardinputsc                    s   |d j \}}}g }t|D ]9}|d | }t|d dg }|d |d }d fdd|D dddd}	| j|		  qd	|}t
j|i}
|
S )
Noutput_seqsr   rR   c                    s$   g | ]}| j v r j | nd qS )z<unk>)r2   )r   ZwidrP   r   r   r      s    z3TranslationPipeline.postprocess.<locals>.<listcomp>z@@  z@@rK   )r"   rangerS   indexr+   replaceappendr<   Z
detokenizerZ   r
   ZTRANSLATION)rD   rh   xyzZtranslation_outr   ri   Zwidstranslationrc   r   rP   r   postprocess   s   

zTranslationPipeline.postprocess)__name__
__module____qualname__r	   r'   strr   r   rd   rg   rs   __classcell__r   r   rG   r   r      s
    <"*),Zos.pathpathr*   typingr   r   r9   numpyra   Z
tensorflowr)   Z
sacremosesr   r   r   Zsubword_nmtr   Zmodelscope.metainfor   Zmodelscope.models.baser	   Zmodelscope.outputsr
   Zmodelscope.pipelines.baser   Zmodelscope.pipelines.builderr   Zmodelscope.utils.configr   Zmodelscope.utils.constantr   r   Zmodelscope.utils.loggerr   __version__compatv1Zdisable_eager_executionr@   __all__Zregister_modulerr   Zcsanmt_translationr   r   r   r   r   <module>   s0   
