o
    W+ i(                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlm  mZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ dd	lmZmZ dd
lmZ ejeje
jdG dd deZG dd dejZdd ZdddZ dS )    N)Dict)Models)
TorchModel)Tensor)MODELS)	ModelFileTasks   )	ConviSTFTConvSTFT)UNet)module_namec                       sH   e Zd ZdZdef fddZdeeef deeef fddZ  Z	S )	FRCRNDecoratorz@ A decorator of FRCRN for integrating into modelscope framework 	model_dirc                    s   t  j|g|R i | t|i || _tj|tj}tj	|rIt
j|t
dd}t|tr?d|v r?| |d  dS | jj|dd dS dS )zzinitialize the frcrn model from the `model_dir` path.

        Args:
            model_dir (str): the model path.
        cpu)Zmap_locationZ
state_dictF)strictN)super__init__FRCRNmodelospathjoinr   ZTORCH_MODEL_BIN_FILEexiststorchloadZdevice
isinstancedictZload_state_dict)selfr   argskwargsZmodel_bin_file
checkpoint	__class__ m/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/modelscope/models/audio/ans/frcrn.pyr      s   
zFRCRNDecorator.__init__inputsreturnc                    s   | j |d }|d |d |d |d |d |d d}d	|v r`| j j|d |d	 |d
d |  | j j|d |d	 |dd|  fdd D |d< |d fddD  |S )Nnoisyr   r	               )Zspec_l1Zwav_l1Zmask_l1Zspec_l2Zwav_l2Zmask_l2cleanMix)modeSiSNRc                       i | ]	}| |   qS r$   item.0k)
mix_resultr$   r%   
<dictcomp>>   s    z*FRCRNDecorator.forward.<locals>.<dictcomp>Zlog_varsc                    r1   r$   r2   r4   )sisnr_resultr$   r%   r8   @   s    )r   forwardlossupdate)r   r&   Zresult_listoutputr$   )r7   r9   r%   r:   ,   s0   


zFRCRNDecorator.forward)
__name__
__module____qualname____doc__strr   r   r   r:   __classcell__r$   r$   r"   r%   r      s    *r   c                       sX   e Zd ZdZ				d fdd	Zdd	 Zd
d ZdddZdddZdddZ	  Z
S )r   z Frequency Recurrent CRN   d      hannc
                    s   t    |d d | _|| _|| _|| _|	| _d}t| j| j| j| jd|d| _t	| j| j| j| jd|d| _
td||||d| _td||||d| _dS )a  
        Args:
            complex: Whether to use complex networks.
            model_complexity: define the model complexity with the number of layers
            model_depth: Only two options are available : 10, 20
            log_amp: Whether to use log amplitude to estimate signals
            padding_mode: Encoder's convolution filter. 'zeros', 'reflect'
            win_len: length of window used for defining one frame of sample points
            win_inc: length of window shifting (equivalent to hop_size)
            fft_len: number of Short Time Fourier Transform (STFT) points
            win_type: windowing type used in STFT, eg. 'hanning', 'hamming'
        r)   r	   Tcomplex)Zfeature_typefix)rH   model_complexitymodel_depthpadding_modeN)r   r   feat_dimwin_lenwin_incfft_lenwin_typer   stftr
   istftr   unetunet2)r   rH   rJ   rK   Zlog_amprL   rN   rO   rP   rQ   r    rI   r"   r$   r%   r   H   sJ   
zFRCRN.__init__c              	   C   s  g }|  |}t|d}t|d d d d d | jd d f |d d d d | jd d d f gd}t|d}t|dd}| |}t|}| |}t|}| 	||\}}	}
|
| |
|	 |
|
 || }| 	||\}}	}
|
| |
|	 |
|
 |S )Nr	   r+   )rR   r   Z	unsqueezecatrM   Z	transposerT   tanhrU   
apply_maskappend)r   r&   out_listcmp_specZ	unet1_outZ	cmp_mask1Z	unet2_outZ	cmp_mask2est_specest_wavest_maskr$   r$   r%   r:      s2   
  









zFRCRN.forwardc                 C   s  t |d d d d d d d d df |d d d d d d d d df  |d d d d d d d d df |d d d d d d d d df   |d d d d d d d d df |d d d d d d d d df  |d d d d d d d d df |d d d d d d d d df   gd}t |d d dd d d d f |d d dd d d d f gd}t |d}t |d d d d d d df |d d d d d d df gd}| |}t |d}|||fS )Nr   r	   )r   rV   squeezerS   )r   r[   cmp_maskr\   r]   r$   r$   r%   rX      s    BBBB@@

zFRCRN.apply_mask        c                 C   sR   g g }}|   D ]\}}d|v r||g7 }q	||g7 }q	||d|ddg}|S )NZbias)paramsweight_decayra   )Znamed_parameters)r   rc   weightsZbiasesnameparamrb   r$   r$   r%   
get_params   s   
zFRCRN.get_paramsr.   c                 C   s  |dkr=d}|t |k r8|| }|d }|| }|d }|| }|d }|dkr2| ||||||}	|t |k st|	dS |dkrd}|t |k r~|| }|d }|| }|d }|| }|d }|dkrx| ||||||\}
}}|
| | }	|t |k sIt|	|
|dS d S )Nr0   r   r	   r*   )Zsisnrr.   )r;   amp_loss
phase_loss)lenloss_1layerr   )r   r(   labelsrZ   r/   countr\   r]   r^   r;   rh   ri   
SiSNR_lossr$   r$   r%   r;      s@   


z
FRCRN.lossc                 C   s
  |dkr"|  dkrt|d}|  dkrt|d}t|| S |dkr|  dkr3t|d}|  dkr?t|d}t|| }| \}}	}
| |}|ddd| jddf }|dd| jdddf }| |}|ddd| jddf }|dd| jdddf }|d |d  }t|| ||  |d  || ||  |d  gd}d||dk< d||d	k < t	|ddd| jddf |ddd| jddf |	 }t	|dd| jdddf |dd| jdddf |	 }|||fS dS )
z Compute the loss by mode
        mode == 'Mix'
            est: [B, F*2, T]
            labels: [B, F*2,T]
        mode == 'SiSNR'
            est: [B, T]
            labels: [B, T]
        r0   r*   r	   r.   Nr)   :0yE>)
dimr   r_   si_snrsizerR   rM   rV   FZmse_loss)r   r(   Zestr]   rl   r`   r/   rn   bdtSZSrZSiYZYrZYiZY_powZgth_maskrh   ri   r$   r$   r%   rk      sN   	



zFRCRN.loss_1layer)rD   rE   rF   rG   )ra   )r.   )r>   r?   r@   rA   r   r:   rX   rg   r;   rk   rC   r$   r$   r"   r%   r   E   s    ; 

r   c                 C   s   t j| | ddd}|S )Nrp   T)Zkeepdim)r   sum)s1s2Znormr$   r$   r%   l2_norm  s   r~   ro   c           
      C   sd   t | |}t ||}|||  | }| | }t ||}t ||}dt|||  |  }	t|	S )N
   )r~   r   log10mean)
r|   r}   ZepsZ
s1_s2_normZ
s2_s2_normZs_targetZe_noiseZtarget_normZ
noise_normZsnrr$   r$   r%   rs     s   




rs   )ro   )!r   typingr   r   Ztorch.nnnnZtorch.nn.functionalZ
functionalru   Zmodelscope.metainfor   Zmodelscope.modelsr   Zmodelscope.models.baser   Zmodelscope.models.builderr   Zmodelscope.utils.constantr   r   Z	conv_stftr
   r   rT   r   Zregister_moduleZacoustic_noise_suppressionZspeech_frcrn_ans_cirm_16kr   Moduler   r~   rs   r$   r$   r$   r%   <module>   s*   0 G