o
    œU)iC  ã                   @   s~   d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ dZddiZeeeeeef ZG d	d
„ d
eƒZdS )é    N)ÚTuple)ÚTensor)ÚDataset)Údownload_url_to_file)Ú_extract_zipzNhttps://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zipZ@f96258be9fdc2cbff6559541aae7ea4f59df3fcaf5cf963aae5ca647357e359cc                	   @   s’   e Zd ZdZddedfdedededefd	d
„Zdefdd„Zde	e
ef fdd„Zdedededefdd„Zdedefdd„Zdefdd„ZdS )ÚVCTK_092a:  *VCTK 0.92* :cite:`yamagishi2019vctk` dataset

    Args:
        root (str): Root directory where the dataset's top level directory is found.
        mic_id (str, optional): Microphone ID. Either ``"mic1"`` or ``"mic2"``. (default: ``"mic2"``)
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
        url (str, optional): The URL to download the dataset from.
            (default: ``"https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"``)
        audio_ext (str, optional): Custom audio extension if dataset is converted to non-default audio format.

    Note:
        * All the speeches from speaker ``p315`` will be skipped due to the lack of the corresponding text files.
        * All the speeches from ``p280`` will be skipped for ``mic_id="mic2"`` due to the lack of the audio files.
        * Some of the speeches from speaker ``p362`` will be skipped due to the lack of  the audio files.
        * See Also: https://datashare.is.ed.ac.uk/handle/10283/3443
    Úmic2Fz.flacÚrootÚmic_idÚdownloadÚurlc              
   C   s‚  |dvrt d|› ƒ‚tj |d¡}tj |d¡| _tj | jd¡| _tj | jd¡| _|| _|| _|rTtj 	| j¡sTtj 
|¡sNt |d ¡}t|||d t|| jƒ tj 	| j¡s_t dƒ‚tt | j¡ƒ| _g | _	 | jD ]O}|d	krz|d
krzqotj | j|¡}	tdd„ t |	¡D ƒƒD ]/}
tj |
¡d }tj | j||› d|› | j› ¡}|dkr´tj 
|¡s´qŽ| j | d¡¡ qŽqod S )N)Zmic1r   z3`mic_id` has to be either "mic1" or "mic2". Found: zVCTK-Corpus-0.92.zipzVCTK-Corpus-0.92ÚtxtZwav48_silence_trimmed)Zhash_prefixz=Dataset not found. Please use `download=True` to download it.Zp280r   c                 s   s    | ]
}|  d ¡r|V  qdS )ú.txtN)Úendswith)Ú.0Úf© r   úd/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/torchaudio/datasets/vctk.pyÚ	<genexpr>U   s   € z$VCTK_092.__init__.<locals>.<genexpr>r   Ú_Zp362)ÚRuntimeErrorÚosÚpathÚjoinÚ_pathÚ_txt_dirÚ
_audio_dirÚ_mic_idÚ
_audio_extÚisdirÚisfileÚ
_CHECKSUMSÚgetr   r   ÚsortedÚlistdirZ_speaker_idsÚ_sample_idsÚsplitextÚappendÚsplit)Úselfr	   r
   r   r   Z	audio_extÚarchiveZchecksumÚ
speaker_idZutterance_dirZutterance_fileÚutterance_idZaudio_path_micr   r   r   Ú__init__&   sF   

ý÷üzVCTK_092.__init__Úreturnc                 C   s8   t |ƒ}| ¡ d W  d   ƒ S 1 sw   Y  d S )Nr   )ÚopenÚ	readlines©r)   Ú	file_pathr   r   r   Ú
_load_text`   s   

$ÿzVCTK_092._load_textc                 C   s
   t  |¡S ©N)Ú
torchaudioÚloadr1   r   r   r   Ú_load_audiod   ó   
zVCTK_092._load_audior+   r,   c           	   
   C   sl   t j | j||› d|› d¡}t j | j||› d|› d|› | j› ¡}|  |¡}|  |¡\}}|||||fS )Nr   r   )r   r   r   r   r   r   r3   r7   )	r)   r+   r,   r
   Ztranscript_pathZ
audio_pathZ
transcriptZwaveformZsample_rater   r   r   Ú_load_sampleg   s   ý
zVCTK_092._load_sampleÚnc                 C   s   | j | \}}|  ||| j¡S )a•  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded

        Returns:
            Tuple of the following items;

            Tensor:
                Waveform
            int:
                Sample rate
            str:
                Transcript
            str:
                Speaker ID
            std:
                Utterance ID
        )r%   r9   r   )r)   r:   r+   r,   r   r   r   Ú__getitem__w   s   zVCTK_092.__getitem__c                 C   s
   t | jƒS r4   )Úlenr%   )r)   r   r   r   Ú__len__Ž   r8   zVCTK_092.__len__N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__ÚURLÚstrÚboolr-   r3   r   r   Úintr7   Ú
SampleTyper9   r;   r=   r   r   r   r   r      s(    úþýü
û:r   )r   Útypingr   r5   Ztorchr   Ztorch.utils.datar   Ztorchaudio._internalr   Ztorchaudio.datasets.utilsr   rB   r!   rE   rC   rF   r   r   r   r   r   Ú<module>   s    ÿ