o
    rqij*                     @   s4  d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	m
Z
 d dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ e/ Z0ej1ej2dG dd deZ3dS )    N)CallableDictListOptionalTupleUnion)PreprocessorsTrainers)Model)SambertHifigan)	MsDataset)build_preprocessor)BaseTrainer)TRAINERS)TtsTrainType) TtsTrainingCfgNotExistsException"TtsTrainingDatasetInvalidException"TtsTrainingHparamsInvalidException TtsTrainingInvalidModelException$TtsTrainingWorkDirNotExistsException)Config)DEFAULT_DATASET_NAMESPACEDEFAULT_DATASET_REVISIONDEFAULT_MODEL_REVISION	ModelFileTasksTrainerStages)	to_device)
get_logger)module_namec                   @   s   e Zd ZdZdZdZdZdddddeee	j
i e	ji idef
d	eeef d
ededededeeef dedededee fddZdd ZdefddZdd Zdd Zdd Zd d! Zd"ed#eeef fd$d%ZdS )&KanttsTrainerdataZtmp_amZtmp_vocZ
orig_modelNZF7ZPinYinFmodelwork_dirspeaker	lang_typecfg_filetrain_datasettrain_dataset_namespacetrain_dataset_revision
train_typemodel_revisionc                 K   s,  |st  j| _tj| jst| j n|| _tj| js(t| j dt	 | _
t|	t	rN|	 D ]\}}|tjksH|tjksH|tjkrM|| j
|< q5t| j
dkrftd i | j
tj< i | j
tj< td| j  tj| j| j| _tj| j| j| _tj| j| j| _tj| j| j| _d| _|
| _d| _d| _ d| _!t"j#| jdd t"j#| jdd t"j#| jdd t"j#| jdd t| j t| j t| j |rt|t$rtj|rtd|  || _n)td| d	|  t%j&|||d
}td|j'  | (|| _n| (|| _|s#t)dt|t$r0| *||}n|j+}t",|| j | j| _+|sJtj| j+t-j.}| /| tj| js[t0dd| _1|| _2d | _3|4dd| _5| 6| j+| j2| _3| j3j7| _7tj| j
v stj| j
v rt8t	t9j:dt;j<| _=d S d S )Nz not existsr   z,train type empty, default to sambert and voczSet workdir to  T)ignore_errorszload /Zdataset_name	namespaceversionztrain dataset:zmodel param is nonezdataset raw path not existsFdeviceZgpu)type)>tempfileTemporaryDirectorynamer#   ospathexistsmakedirsr   dictr*   
isinstanceitemsr   TRAIN_TYPE_SAMBERTTRAIN_TYPE_VOCTRAIN_TYPE_BERTlenloggerinfojoinDATA_DIRdata_dir
AM_TMP_DIR
am_tmp_dirVOC_TMP_DIRvoc_tmp_dirORIG_MODEL_DIRZorig_model_dirraw_dataset_pathskip_scriptaudio_config_patham_config_pathvoc_config_pathshutilrmtreestrr   loadconfig_kwargsload_dataset_raw_pathr   Zget_or_download_model_dir	model_dircopytreer   CONFIGURATION	parse_cfgr   Zfinetune_from_pretrainr$   r"   getr2   	get_modelr%   r   r   Zkantts_data_preprocessorr   Ztext_to_speechaudio_data_preprocessor)selfr"   r#   r$   r%   r&   r'   r(   r)   r*   Zpreprocess_skip_scriptr+   kwargskvrW    rb   l/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/modelscope/trainers/audio/tts_trainer.py__init__(   s   










zKanttsTrainer.__init__c                 C   s  t j|}t|ddd}t|}d|vrtdd|d v r5t j||d d }t j|r5|| _	d|d v rOt j||d d }t j|rO|| _
d|d v rit j||d d }t j|ri|| _| jsd	|d v r|d d	 }t j|r|| _nBd
|v r|dt}	|dt}
tj|d
 |	|
d}| || _n1d|v r|d | _W d    d S W d    d S W d    d S W d    d S W d    d S W d    d S 1 sw   Y  d S )Nrzutf-8)encodingtrainzmodel not support finetuneaudio_config	am_config
voc_configr'   idr0   revisionr/   r8   )r7   r8   dirnameopenjsonrT   r   rD   r9   rN   rO   rP   rL   r[   r   r   r   rV   )r^   r&   Zcur_dirfconfigrh   ri   rj   datasetr0   rl   msrb   rb   rc   rZ      sp   


$""zKanttsTrainer.parse_cfgrr   c                 C   s6   d|j vr	tdd|j d vrtd|j d d S )NZsplit_configz'split_config not found in config_kwargsrg   zno train split in split_config)rU   r   )r^   rr   rb   rb   rc   rV      s   
z#KanttsTrainer.load_dataset_raw_pathc              	   C   s`   | j r.| j}|rtj|s| j| j}| j| j}|  | j	| j
|| j| j| j| d S d S N)r]   rN   r7   r8   r9   r"   Zget_voice_audio_config_pathr$   Zget_voice_se_model_pathrL   rF   r%   rM   )r^   rh   Zse_modelrb   rb   rc   prepare_data   s   zKanttsTrainer.prepare_datac                 C   s   d S rt   rb   )r^   rb   rb   rc   prepare_text   s   zKanttsTrainer.prepare_textc                 C   s>   t tj| jtj}|di }t	d| jdd|}|S )Nr"   T)rW   Zis_trainrb   )
r   	from_filer7   r8   rD   rW   r   rY   r[   r   )r^   rW   r$   cfgZ	model_cfgr"   rb   rb   rc   r\      s   zKanttsTrainer.get_modelc                 O   s   | j stdd}d|v r|d }tj| jv stj| jv r!|   tj| jv r+|   | j	| j
| j| jd}| j| jd}| j | j|| j|| d S )Nzmodel is noneFignore_pretrain)r#   rH   rJ   rF   )ri   rj   )r"   r   r   r>   r*   r?   ru   r@   rv   r#   rH   rJ   rF   rO   rP   rg   r$   )r^   argsr_   ry   Zdir_dictZconfig_dictrb   rb   rc   rg      s(   zKanttsTrainer.traincheckpoint_pathreturnc                 O   s   i S rt   rb   )r^   r{   rz   r_   rb   rb   rc   evaluate   s   zKanttsTrainer.evaluate)__name__
__module____qualname__rE   rG   rI   rK   r   r   r   r>   r?   r   r   r
   rS   r   r;   r   rd   rZ   rV   ru   rv   r\   rg   r   floatr}   rb   rb   rb   rc   r    !   s^    

	
l)	
r    )4r7   rQ   r4   zipfiletypingr   r   r   r   r   r   ro   Zmodelscope.metainfor   r	   Zmodelscope.modelsr
   Zmodelscope.models.audio.ttsr   Zmodelscope.msdatasetsr   Z modelscope.preprocessors.builderr   Zmodelscope.trainers.baser   Zmodelscope.trainers.builderr   Z"modelscope.utils.audio.audio_utilsr   Z%modelscope.utils.audio.tts_exceptionsr   r   r   r   r   Zmodelscope.utils.configr   Zmodelscope.utils.constantr   r   r   r   r   r   Zmodelscope.utils.data_utilsr   Zmodelscope.utils.loggerr   rB   Zregister_moduleZspeech_kantts_trainerr    rb   rb   rb   rc   <module>   s,     