o
    W+ i                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ ddlmZ ddlmZ eje
jdG dd deZdS )    N)Union)DeepSpeedEngine)mpu)nn)Trainers)
TorchModel)DistributedPlug)BertLayerNorm)TextGenerator)ModeKeys   )TRAINERS)NlpEpochBasedTrainer)module_namec                   @   sd   e Zd Zdeejef fddZdeejef fddZdd Z	dd	 Z
d
d Zdd Zdd ZdS )PlugTrainerreturnc                 C   sb   t tjdd}tjdd}tjdd}t| j|f||d| jj}| j| |j_|jS )NZ
LOCAL_RANKZMASTER_ADDRz	127.0.0.1ZMASTER_PORTZ29500)	master_ipmaster_port)	intosenvirongetr   Z	model_dircfgmodelunwrap_module)selfZrankr   r   r    r   p/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/modelscope/trainers/nlp/plug_trainer.pybuild_model   s   zPlugTrainer.build_modelc                 C   s   ddl m} ||S )Nr   )DistributedDataParallel)Z modelscope.utils.nlp.distributedr    )r   r   ZDDPr   r   r   to_parallel#   s   zPlugTrainer.to_parallelc                 C   s   dg i}g dd}|  D ]?}t|ttjjfr*|d dd t|j	 D  q|d dd t|j
 D  |d dd t|j
 D  q||fS )Nparams        )r"   weight_decayc                 S   s   g | ]}|d ur|qS )Nr   ).0pr   r   r   
<listcomp>-   s
    zIPlugTrainer._get_params_for_weight_decay_optimization.<locals>.<listcomp>c                 S   s4   g | ]\}}|d urd|vrd|vr|dkr|qS )NZ
mask_scoremaskbiasr   r%   nr&   r   r   r   r'   2   s    c                 S   s$   g | ]\}}|d ur|dkr|qS )Nr)   r   r*   r   r   r   r'   7   s
    )modules
isinstancer	   torchr   Z	LayerNormextendlist_parametersvaluesitems)r   moduleZweight_decay_paramsZno_weight_decay_paramsZmodule_r   r   r   )_get_params_for_weight_decay_optimization'   s   


z5PlugTrainer._get_params_for_weight_decay_optimizationc                 C   sV  | j \}}| jjdd }|d ur|di }ddlm} | j}|jjj	j
}|jjj	jj}|jjjj}	g }
|
t| |7 }
|
t| |7 }
|
t| |	7 }
|
D ]}|d D ]
}t|dsbd|_qXqR||
|j|jd}| jjd	d }|d ur|d usJ |di }dd
lm} | j}|||j|j| ||jdd}|| _|| _| j| j||fS )N	optimizeroptionsr   )DeepSpeedCPUAdamr"   model_parallelF)lrr$   lr_scheduler)AnnealingLRr   )Zstart_lrZwarmup_iter	num_itersdecay_styleZ	last_iter)Z
optimizersr   trainr   popZdeepspeed.ops.adamr8   r   r4   Zbert
embeddingsencoderlayerdecoderr0   r5   hasattrr9   r:   r$   Z&modelscope.models.nlp.plug.AnnealingLRr<   Z	max_itersZwarmupr>   r6   r;   )r   r6   r;   Zoptimizer_cfgZoptim_optionsr8   r   rA   ZlayersZ
dec_layersZparam_groupsZparam_groupparamZlr_scheduler_cfgZ
lr_optionsr<   r=   r   r   r   create_optimizer_and_scheduler>   s^   

z*PlugTrainer.create_optimizer_and_schedulerc           	      C   s   |  \}}d}ttj|||f|jd|d||}tj|  tj|jd}d|||k< tj|tj|jd}|	d
|}|||fS )N   )device)ZdtyperI   r#   r   )sizer.   ZtrilZonesrI   viewfloatZarangelongZ	unsqueezeZ	expand_as)	r   dataZ	eod_token
batch_sizeZ
seq_lengthZatt_mask_batchattention_maskZ	loss_maskposition_idsr   r   r   _get_masks_and_position_idsn   s(   

z'PlugTrainer._get_masks_and_position_idsc              	   C   s   t j| _t| jjdd}|d d d d df  }|d d d dd f  }| |d\}}}t| jjdd r=| }||d d |d	 ||||d
\}	}
t	
|
  |}|d}t|d| |  }d|i| _| j| j d S )Ncheckpoint_activationsTlabelsr   rH   r   Zfp16	input_idsrP   )rS   loss)r   ZTRAIN_modegetattrr   r?   
contiguousrR   Zhalfr   Zvocab_parallel_cross_entropyrL   rK   r.   sumZtrain_outputsZ
log_bufferupdate)r   r   ZinputsrS   Z
tgt_tokensZ
tgt_labelsZtgt_attention_maskZdec_loss_maskrQ   _outputZlossesrV   r   r   r   
train_step   s6   

	

zPlugTrainer.train_stepc                 C   sr  t | jtr| jj}n| j}|  | | jjj}|d jd }t	|| j
jd }t  |d  }|d  }|d  }|d d dd f  }	|d |g}
||
}|d }|	   }g |d< g |d< t|D ]8}|| d }d	|||d k< |   }| j
j|| d
d}| j
j|d
d}|d | |d | qnW d    |S 1 sw   Y  |S )NrU   r   rP   rT   rH   ZpredictionspredsZtgtsd   T)Zskip_special_tokens)r-   r   r   r4   evalr   configZoriginal_vocab_sizeshaper
   Zeval_preprocessorZnlp_tokenizerr.   Zno_gradrM   byterY   Ztranslate_batchcpunumpytolistrangedecodeappend)r   rN   r   Z
vocab_sizerO   Zbeam_generatortokensZpadding_maskZ
target_idsZtarget_labelsZencoder_inputsresultZ	pred_listZtarget_listiZpred_idsZgold_stringZpred_stringr   r   r   evaluation_step   sL   




zPlugTrainer.evaluation_stepN)__name__
__module____qualname__r   r   Moduler   r   r!   r5   rG   rR   r^   rn   r   r   r   r   r      s    0r   )r   typingr   r.   Z	deepspeedr   Zmegatron_utilr   r   Zmodelscope.metainfor   Zmodelscope.models.baser   Zmodelscope.models.nlp.plugr   Z#modelscope.models.nlp.plug.backboner	   Z$modelscope.models.nlp.plug.generatorr
   Zmodelscope.utils.constantr   baser   Znlp_trainerr   Zregister_moduleZnlp_plug_trainerr   r   r   r   r   <module>   s     