o
    rqi                      @   s   d dl Zd dlZd dlmZ d dlm  mZ G dd dejZ	G dd de	Z
dededed	efd
dZG dd dejZG dd deZG dd dejjZdS )    Nc                       sV   e Zd Z									d fdd	Zd	d
 ZdededefddZdd Z  ZS )Conv1d_ON   sameTreflectFc              
      s   t    || _|| _|| _|| _|
| _d| _|| _|d u r&|d u r&t	d|d u r/| 
|}tj||| j| j| jd||	d| _d S )NFz.Must provide one of input_shape or in_channelsr   )stridedilationpaddinggroupsbias)super__init__kernel_sizer   r   r   padding_mode	unsqueezeskip_transpose
ValueError_check_input_shapennConv1dconv)selfout_channelsr   input_shapein_channelsr   r   r   r	   r
   r   r   	__class__ f/home/app/PaddleOCR-VL/.venv_paddleocr/lib/python3.10/site-packages/modelscope/models/audio/sv/TDNN.pyr      s,   

zConv1d_O.__init__c                 C   s   | j s	|dd}| jr|d}| jdkr"| || j| j| j}n#| jdkr8| jd | j }t	||df}n| jdkr>nt
d| j | |}| jrR|d}| j s[|dd}|S )zReturns the output of the convolution.

        Arguments
        ---------
        x : torch.Tensor (batch, time, channel)
            input to convolve. 2d or 4d tensors are expected.
        r   r   Zcausalr   Zvalidz1Padding must be 'same', 'valid' or 'causal'. Got )r   	transposer   r   _manage_paddingr   r   r   Fpadr   r   Zsqueeze)r   xZnum_padZwxr   r   r   forward3   s0   	





zConv1d_O.forwardr   r   r   c                 C   s.   |j d }t||||}tj||| jd}|S )Nr   )mode)shapeget_padding_elemr!   r"   r   )r   r#   r   r   r   L_inr   r   r   r   r    \   s   
zConv1d_O._manage_paddingc                 C   sr   t |dkrd| _d}n| jr|d }nt |dkr|d }n
tdtt | | jd dkr7td| j |S )zIChecks the input shape and returns the number of input channels.
           Tr      z"conv1d expects 2d, 3d inputs. Got r   z4The field kernel size must be an odd number. Got %s.)lenr   r   r   strr   )r   r&   r   r   r   r   r   n   s"   


zConv1d_O._check_input_shape)	NNr   r   r   r   Tr   F)	__name__
__module____qualname__r   r$   intr    r   __classcell__r   r   r   r   r   	   s(    ()
r   c                          e Zd Z fddZ  ZS )r   c                       t  j|ddi| d S Nr   Tr   r   r   argskwargsr   r   r   r         zConv1d.__init__r-   r.   r/   r   r1   r   r   r   r   r          r   r(   r   r   r   c                 C   s   |dkr%t | ||  | d }||d  ||  }|d |d g}|S | ||d   d | d }| | d | | d g}|S )zThis function computes the number of elements to add for zero-padding.

    Arguments
    ---------
    L_in : int
    stride: int
    kernel_size : int
    dilation : int
    r   r)   )mathceil)r(   r   r   r   Zn_stepsZL_outr   r   r   r   r'      s   
r'   c                       s6   e Zd Z								d
 fdd	Zdd	 Z  ZS )BatchNorm1d_ONh㈵>皙?TFc	           	         sV   t    || _|| _|d u r|r|d }n|d u r|d }tj|||||d| _d S )Nr   r   )epsmomentumaffinetrack_running_stats)r   r   combine_batch_timer   r   BatchNorm1dnorm)	r   r   
input_sizerA   rB   rC   rD   rE   r   r   r   r   r      s   

zBatchNorm1d_O.__init__c                 C   s   |j }| jr,|jdkr||d |d  |d }n||d |d  |d |d }n	| js5|dd}| |}| jrD||}|S | jsM|dd}|S )a  Returns the normalized input tensor.

        Arguments
        ---------
        x : torch.Tensor (batch, time, [channels])
            input to normalize. 2d or 3d tensors are expected in input
            4d tensors can be used when combine_dims=True.
        r*   r   r   r)   r   )r&   rE   ndimZreshaper   r   rG   )r   r#   Zshape_orZx_nr   r   r   r$      s    	


zBatchNorm1d_O.forward)NNr?   r@   TTFF)r-   r.   r/   r   r$   r1   r   r   r   r   r>      s    r>   c                       r2   )rF   c                    r3   r4   r5   r6   r   r   r   r      r9   zBatchNorm1d.__init__r:   r   r   r   r   rF      r;   rF   c                       sL   e Zd ZdZdejjdg dg dg dddf fd	d
	ZdddZ  Z	S )XvectoraL  This model extracts X-vectors for speaker recognition and diarization.

    Arguments
    ---------
    device : str
        Device used e.g. "cpu" or "cuda".
    activation : torch class
        A class for constructing the activation layers.
    tdnn_blocks : int
        Number of time-delay neural (TDNN) layers.
    tdnn_channels : list of ints
        Output channels for TDNN layer.
    tdnn_kernel_sizes : list of ints
        List of kernel sizes for each TDNN layer.
    tdnn_dilations : list of ints
        List of dilations for kernels in each TDNN layer.
    lin_neurons : int
        Number of neurons in linear layers.

    Example
    -------
    >>> compute_xvect = Xvector('cpu')
    >>> input_feats = torch.rand([5, 10, 40])
    >>> outputs = compute_xvect(input_feats)
    >>> outputs.shape
    torch.Size([5, 1, 512])
    cpu   )   rM   rM   rM   i  )rL   r*   r*   r   r   )r   r)   r*   r   r   rM   P   c	              	      sd   t    t | _t|D ]!}	||	 }
| jt||
||	 ||	 d| t|
dg ||	 }qd S )N)r   r   r   r   )rH   )	r   r   r   Z
ModuleListblocksrangeextendr   rF   )r   ZdeviceZ
activationZtdnn_blocksZtdnn_channelsZtdnn_kernel_sizesZtdnn_dilationsZlin_neuronsr   Zblock_indexr   r   r   r   r     s    



zXvector.__init__Nc              	   C   sR   | dd}| jD ]}z|||d}W q	 ty    ||}Y q	w | dd}|S )z]Returns the x-vectors.

        Arguments
        ---------
        x : torch.Tensor
        r   r)   )lengths)r   rO   	TypeError)r   r#   Zlenslayerr   r   r   r$     s   
zXvector.forward)N)
r-   r.   r/   __doc__torchr   Z	LeakyReLUr   r$   r1   r   r   r   r   rJ      s    rJ   )numpynprV   Ztorch.nnr   Ztorch.nn.functionalZ
functionalr!   Moduler   r   r0   r'   r>   rF   rJ   r   r   r   r   <module>   s   }<