o
    rqi                     @   s   d dl Z d dlmZ d dlmZmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ ejejdG dd deZdS )    N)deepcopy)AnyDictListUnion)nn)Trainers)Model
TorchModel)GPT3ForTextGeneration)TRAINERS)NlpEpochBasedTrainer)build_parallel)Config)is_megatron_initialized)module_namec                       s   e Zd Zdef fddZdeejef fddZ	dd Z
d	d
 Zdedeeef deeef fddZdedeeef deeef fddZdefddZ  ZS )GPT3Trainercfgc                    s&   t  |}ttjdd|j_|S )NZRANKr   )superrebuild_configintosenvirongetmodelZrank)selfr   	__class__ k/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/modelscope/trainers/nlp/gpt3_trainer.pyr      s   zGPT3Trainer.rebuild_configreturnc                 C   s   | j dd d ur!t| j d }|t|tj gd t|S td|dtj gd}t	 rCddl
m} |tj | d t|S )	NZparallel)module
device_idsZDistributedDataParallelT)typer!   Zfind_unused_parametersr"   r   )mpu)Zoutput_deviceZprocess_group)r   r   r   updatedicttorchcudaZcurrent_devicer   r   Zmegatron_utilr$   Zget_data_parallel_group)r   r   Zdp_cfgr$   r   r   r   to_parallel   s&   
zGPT3Trainer.to_parallelc                 C   s   | j j}|| S N)Zeval_preprocessor	tokenizerZ
detokenizetolist)r   tokensr+   r   r   r   _decode3   s   zGPT3Trainer._decodec                 C   s<   | j r| jjn| j}|  d|v r| ||S | ||S )NZ
inputs_len)_distr   r!   eval_generate_eval_forward_eval)r   datar   r   r   r   evaluation_step7   s
   zGPT3Trainer.evaluation_stepr   r3   c                    sd   |j ddd ||}|d } fddt|d |D |d<  fd	dt|d
 |D |d< |S )N   g        )Ztop_kZtop_pprompts_lenc                    s"   g | ]\}}  ||d  qS r*   r.   .0seqZskip_lenr   r   r   
<listcomp>G   s    z.GPT3Trainer._generate_eval.<locals>.<listcomp>	sequencespredsc                    s&   g | ]\}}  ||d  d qS )r5   Nr7   r8   r;   r   r   r<   K   s    labelsZtgts)r%   generatezip)r   r   r3   resultr6   r   r;   r   r1   @   s   




zGPT3Trainer._generate_evalc                 C   s
   | |S r*   )forward)r   r   r3   r   r   r   r2   Q   s   
zGPT3Trainer._forward_evalc                 C   s   t j| j| j| jjdS )N)Zcfg_dictZmegatron_cfg)r	   Zfrom_pretrainedZ	model_dirr   Zmegatronr;   r   r   r   build_modelU   s   zGPT3Trainer.build_model)__name__
__module____qualname__r   r   r   r   Moduler
   r)   r.   r4   r   r   strr   r1   r2   rD   __classcell__r   r   r   r   r      s     	





r   )r   copyr   typingr   r   r   r   r'   r   Zmodelscope.metainfor   Zmodelscope.models.baser	   r
   Zmodelscope.models.nlpr   Zmodelscope.trainers.builderr   Zmodelscope.trainers.nlp_trainerr   Z$modelscope.trainers.parallel.builderr   Zmodelscope.utils.configr   Zmodelscope.utils.megatron_utilsr   Zregister_moduleZgpt3_trainerr   r   r   r   r   <module>   s   