o
    W+ i4                     @   s   d dl Z d dlZd dlZd dlmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d d	lmZ e Zejejd
G dd deZdS )    N)DictOptionalUnion)build_trainer)Trainers)	MsDataset)BaseTrainer)TRAINERS)DEFAULT_DATASET_NAMESPACEDEFAULT_DATASET_REVISIONDEFAULT_MODEL_REVISION	ModelFileTasksTrainerStages)
get_logger)module_namec                   @   s   e Zd ZdZddddeddddf	dedededed	eee	ef  d
ee dee
 dee
 dee dee fddZdd Zdd ZdddZdd Zdedeeef fddZdS )
ASRTrainerdataNFZsmallmodelwork_dirdistributeddataset_typedata_dirmodel_revision
batch_bins	max_epochlrmate_paramsc                 K   sB  |st  j| _tj| jst| j n|| _tj| js(t| j dt	
d| j  tj| j| j| _d| _|| _|| _tj| jdd tj| jdd tj|r]|}n| ||}|| _tj| jd| _| | j| _d|vr| || j\| _| _n|d | _tj| j| j| j| j| j|||	|
d		| _d
S )a  ASR Trainer.

        Args:
            model (str) : model name
            work_dir (str): output dir for saving results
            distributed (bool): whether to enable DDP training
            dataset_type (str): choose which dataset type to use
            data_dir (str): the path of data
            model_revision (str): set model version
            batch_bins (str): batch size
            max_epoch (int): the maximum epoch number for training
            lr (float): learning rate
            mate_params (dict): for saving other training args
        Examples:

        >>> import os
        >>> from modelscope.metainfo import Trainers
        >>> from modelscope.msdatasets import MsDataset
        >>> from modelscope.trainers import build_trainer
        >>> ds_dict = MsDataset.load('speech_asr_aishell1_trainsets')
        >>> kwargs = dict(
        >>>     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
        >>>     data_dir=ds_dict,
        >>>     work_dir="./checkpoint")
        >>> trainer = build_trainer(
        >>>     Trainers.speech_asr_trainer, default_args=kwargs)
        >>> trainer.train()

        z not existszSet workdir to  Tignore_errorsexist_okzconfiguration.jsonZraw_data_dir)	Zmodelscope_dictr   
output_dirr   r   r   r   r   r   N)tempfileTemporaryDirectorynamer   ospathexistsmakedirs	ExceptionloggerinfojoinDATA_DIRr   Zraw_dataset_pathr   r   shutilrmtreeZget_or_download_model_dir	model_dirZ	model_cfg	parse_cfgcfg_dictload_dataset_raw_pathtrain_data_dirdev_data_dirr   trainer)selfr   r   r   r   r   r   r   r   r   r   kwargsr2    r;   q/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/modelscope/trainers/audio/asr_trainer.py__init__   sJ   )
zASRTrainer.__init__c                 C   sj  t j|}t }t|ddd}t|}|d d d |d< ||d< t j||d d |d	< t j||d d d
 |d
< t j|d|d< t j||d d d |d< t j|d|d< d|d d v ryt j||d d d |d< nd |d< d|d d v rt j||d d d |d< n|d	 |d< W d    |S W d    |S 1 sw   Y  |S )Nrzutf-8)encodingr   Zmodel_configmoder2   Zam_model_nameZam_model_fileZam_model_configzfinetune.yamlZfinetune_configZmvn_fileZ	cmvn_fileZseg_dictZbpemodelZ
init_model)r'   r(   dirnamedictopenjsonloadr.   )r9   Zcfg_fileZcur_dirr4   fconfigr;   r;   r<   r3   q   sH   




zASRTrainer.parse_cfgc                 C   sT   d|vrt d|| j||dd}d|vrt d|| j||dd}||fS )Ntrainz*dataset {0} does not contain a train split)splitZ
validationz(dataset {0} does not contain a dev split)r+   formatprepare_data)r9   datasetZoutput_data_dirr6   r7   r;   r;   r<   r5      s   z ASRTrainer.load_dataset_raw_pathrH   c                 C   s   t j||}tj|dd t j|dd t|| }tt j|dd}tt j|dd}t|D ]0}|| | }	|	d }
|	d }|	d	t j
|
|
gd
  |	d	t j
|
|gd
  q3|  |  |S )NTr   r!   zwav.scpwtextz
Audio:FILEz
Text:LABEL	
)r'   r(   r.   r0   r1   r*   lenrC   rangewritebasenameclose)r9   rL   Zout_base_dirrI   Zout_dirZdata_cntZ
fp_wav_scpZfp_texticontentZwav_filerN   r;   r;   r<   rK      s"   "zASRTrainer.prepare_datac                 O   s   | j   d S N)r8   run)r9   argsr:   r;   r;   r<   rH      s   zASRTrainer.traincheckpoint_pathreturnc                 O   s   t rX   )NotImplementedError)r9   r[   rZ   r:   r;   r;   r<   evaluate   s   zASRTrainer.evaluate)rH   )__name__
__module____qualname__r/   r   strboolr   r   r   intfloatrB   r=   r3   r5   rK   rH   r   r^   r;   r;   r;   r<   r      sN    	

V

r   )r'   r0   r$   typingr   r   r   rD   Z
funasr.binr   Zmodelscope.metainfor   Zmodelscope.msdatasetsr   Zmodelscope.trainers.baser   Zmodelscope.trainers.builderr	   Zmodelscope.utils.constantr
   r   r   r   r   r   Zmodelscope.utils.loggerr   r,   Zregister_moduleZspeech_asr_trainerr   r;   r;   r;   r<   <module>   s    