o
    W+ i                     @   s  d dl Z d dlZd dlmZ d dlZd dlZd dlmZ d dl	m  m
Z d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ dd	lmZmZmZ dd
lmZ ddlmZ ejej ej!dG dd deZ"G dd dej#Z$G dd de%Z&		dddZ'		dddZ(dS )    N)Dict)Models)
TorchModel)Tensor)MODELS)	ModelFileTasks   )DenseEncoderMappingDecoderPhaseDecoder)ScheduledFloat)Zipformer2DualPathEncoder)module_namec                       sD   e Zd Zdef fddZdeeef deeef fddZ  ZS )ZipenhancerDecorator	model_dirc                    s   t  j|g|R i | t|d |d |d |d |d d}t|}t|| _tj|t	j
}tj|r]tj|tdd}t|trSd	|v rS| |d	  d S | j|d
  d S d S )Nnum_tsconformersdense_channelformer_confbatch_firstmodel_num_spks)r   r   r   r   r   cpu)Zmap_locationZ
state_dict	generator)super__init__dictAttrDictZipEnhancermodelospathjoinr   ZTORCH_MODEL_BIN_FILEexiststorchloaddevice
isinstanceZload_state_dict)selfr   argskwargshZmodel_bin_file
checkpoint	__class__ s/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/modelscope/models/audio/ans/zipenhancer.pyr      s*   	

zZipenhancerDecorator.__init__inputsreturnc              	   C   s   d}d}d}|d }t |jd t |d  }|| }t||||ddd\}}	}
| j||	\}}}}}t|||||ddd}|| }d	|i}|S )
Ni  d   Znoisyr	   g       @333333?T)compress_factorcenterZwav_l2)r#   sqrtshapesummag_pha_stftr   forwardmag_pha_istft)r'   r0   n_ffthop_sizewin_sizeZ	noisy_wavZnorm_factorZnoisy_audiomagphacomZamp_gZpha_gZcom_g_otherswavoutputr.   r.   r/   r:   :   s<   	zZipenhancerDecorator.forward)	__name__
__module____qualname__strr   r   r   r:   __classcell__r.   r.   r,   r/   r      s    *r   c                       s$   e Zd Z fddZdd Z  ZS )r   c                    sp   t t|   || _|j}|| _t|dd| _td	dt	ddd|j
| _t||jd| _t||jd| _dS )
z
        Initialize the ZipEnhancer module.

        Args:
        h (object): Configuration object containing various hyperparameters and settings.
        having num_tsconformers, former_name, former_conf, mask_decoder_type, ...
           )Z
in_channelr	   )g        r3   )g     @g?)Zoutput_downsampling_factorZdropout)Zout_channelNr.   )r   r   r   r*   r   Znum_tscblocksr
   dense_encoderr   r   r   TSConformerr   r   mask_decoderr   phase_decoder)r'   r*   r   r,   r.   r/   r   _   s   zZipEnhancer.__init__c           
      C   s  t  }|ddddd}|ddddd}tj||fdd}| |}| |}| |}| |}|dddddddf ddddd	d}|dddddddf ddddd	d}tj
|t| |t| fdd}	|||	d|fS )a  
        Forward pass of the ZipEnhancer module.

        Args:
        noisy_mag (Tensor): Noisy magnitude input tensor of shape [B, F, T].
        noisy_pha (Tensor): Noisy phase input tensor of shape [B, F, T].

        Returns:
        Tuple: denoised magnitude, denoised phase, denoised complex representation,
               (optional) predicted noise components, and other auxiliary information.
        r      rK   r	   dimN)r   Z	unsqueezeZpermuter#   catrL   rM   rN   rO   Zsqueezestackcossin)
r'   Z	noisy_magZ	noisy_pharC   xZpred_magZpred_phaZdenoised_magZdenoised_phaZdenoised_comr.   r.   r/   r:   v   s0   



((zZipEnhancer.forward)rF   rG   rH   r   r:   rJ   r.   r.   r,   r/   r   ]   s    r   c                       s   e Zd Z fddZ  ZS )r   c                    s    t t| j|i | | | _d S )N)r   r   r   __dict__)r'   r(   r)   r,   r.   r/   r      s   
zAttrDict.__init__)rF   rG   rH   r   rJ   r.   r.   r,   r/   r      s    r         ?Tc                 C   s   t j|| jd}t j| |||||dddd	}t |}t |ddd }t |d d d d d d d	f |d d d d d d d
f d }	t ||}t j	|t 
|	 |t |	 fdd}
||	|
fS )Nr%   ZreflectFT)
hop_length
win_lengthwindowr5   Zpad_mode
normalizedZreturn_complexrK   rP   g&.>r	   r   gh㈵>rR   )r#   hann_windowr%   ZstftZview_as_realr6   powr8   atan2rU   rV   rW   )yr<   r=   r>   r4   r5   r`   Z	stft_specr?   r@   rA   r.   r.   r/   r9      s$   

@&
r9   c           
      C   sZ   t | d| } t | t | | t | }t j||jd}t j||||||d}	|	S )NrZ   r[   )r\   r]   r^   r5   )r#   ra   complexrV   rW   r`   r%   Zistft)
r?   r@   r<   r=   r>   r4   r5   rA   r`   rD   r.   r.   r/   r;      s    r;   )rZ   T))r   randomtypingr   numpynpr#   Ztorch.nnnnZtorch.nn.functionalZ
functionalFZmodelscope.metainfor   Zmodelscope.modelsr   Zmodelscope.models.baser   Zmodelscope.models.builderr   Zmodelscope.utils.constantr   r   Zzipenhancer_layers.generatorr
   r   r   Zzipenhancer_layers.scalingr   Z$zipenhancer_layers.zipenhancer_layerr   Zregister_moduleZacoustic_noise_suppressionZ)speech_zipenhancer_ans_multiloss_16k_baser   Moduler   r   r   r9   r;   r.   r.   r.   r/   <module>   s8   A@
 