o
    #1 i$/                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlZd dlZd dlmZmZmZmZ erFd dlmZ eeZe rad dlZe Zedd	d
 eD ZnedddiZe rod dlZdd ej  D Z!G dd dZ"dee#ej$f dee#ej$f fddZ%dej$dej$fddZ&dej$dej$dej$fddZ'ede(de(de#de#de(dej$fd d!Z)dS )"    N)Enum)cache)Path)TYPE_CHECKINGUnion)assert_soundfile_installedassert_soxr_installedis_soundfile_installedis_soxr_installed)RawAudioAudioFormatc                 C   s   i | ]}||qS  r   ).0format_namer   r   `/home/app/PaddleOCR-VL-test/.venv_paddleocr/lib/python3.10/site-packages/mistral_common/audio.py
<dictcomp>    s    r   nonec                 C   s   g | ]}|j  qS r   )valuelower)r   vr   r   r   
<listcomp>'   s    r   c                   @   s   e Zd ZdejdededdfddZdefdd	Zd'd
dZ	e
defddZed(dededd fddZed(dededd fddZed(dededd fddZed(dededd fddZd)dededefddZed*d"d#Zd$eddfd%d&ZdS )+Audioaudio_arraysampling_rateformatreturnNc                 C   s   || _ || _|| _|   dS )a  Initialize an Audio instance with audio data, sampling rate, and format.

        Args:
            audio_array: The audio data as a numpy array.
            sampling_rate: The sampling rate of the audio in Hz.
            format: The format of the audio file.
        N)r   r   r   _check_valid)selfr   r   r   r   r   r   __init__+   s   zAudio.__init__c                 C   s*   d| j  dt| j| j  dd| jj S )NzAudio - sampling_rate=z Hz, duration=z.2fz	s, shape=)r   lenr   shaper   r   r   r   __repr__8   s   
zAudio.__repr__c                 C   sd   t | jtjsJ ttj| jjdksJ d| jjt  | jtv s0J d| jdtd S )N   zself.audio_array.ndim=zself.format= not in EXPECTED_FORMAT_VALUES=)	
isinstancer   npndarraytypendimr   r   EXPECTED_FORMAT_VALUESr!   r   r   r   r   ?   s   $zAudio._check_validc                 C   s   | j jd | j }|S )z{Calculate the duration of the audio in seconds.

        Returns:
           The duration of the audio in seconds.
        r   )r   r    r   )r   durationr   r   r   r+   E   s   zAudio.durationTurlstrictc              
   C   sx   zt | }|  tj|j|dW S  t jy' } ztd|  |d}~w ty; } z	td|  d|d}~ww )zCreate an Audio instance from a URL.

        Args:
            url: The URL of the audio file.
            strict: Whether to strictly enforce mono audio.

        Returns:
            An instance of the Audio class.
        r-   z#Failed to download audio from URL: Nz*Failed to create Audio instance from URL: z .)	requestsgetraise_for_statusr   
from_bytescontentRequestException
ValueError	Exception)r,   r-   responseer   r   r   from_urlP   s   
zAudio.from_urlaudio_base64c              
   C   s`   t   td| r| dd } zt| }W n ty( } ztd|d}~ww tj	||dS )a  Create an Audio instance from a base64 encoded string.

        Args:
            audio_base64: The base64 encoded audio data.
            strict: Whether to strictly enforce mono audio. Defaults to True.

        Returns:
            An instance of the Audio class.
        z^data:audio/\w+;base64,,r#   zHbase64 decoding failed. Please check the input string is a valid base64.Nr.   )
r   rematchsplitbase64	b64decoder6   r5   r   r2   )r:   r-   audio_bytesr8   r   r   r   from_base64d   s   
zAudio.from_base64filec                 C   s   t   t| tr| dr| dd } t|  s!td| dt| d}| }W d   n1 s5w   Y  t	j
||dS )zCreate an Audio instance from an audio file.

        Args:
            file: Path to the audio file.
            strict: Whether to strictly enforce mono audio. Defaults to True.

        Returns:
            An instance of the Audio class.
        zfile://   Nzfile=z does not existrbr.   )r   r%   str
startswithr   existsFileNotFoundErroropenreadr   r2   )rC   r-   frA   r   r   r   	from_file{   s   
zAudio.from_filerA   c           	   	   C   s   t | )}t|}|jdd}|j}|j}W d   n1 s"w   Y  W d   n1 s1w   Y  t|}|j	 }|j
dkrT|rNtd|j
|jdd}t|||dS )zCreate an Audio instance from bytes.

        Args:
            audio_bytes: The audio data as bytes.
            strict: Whether to strictly enforce mono audio. Defaults to True.

        Returns:
            An instance of the Audio class.
        Zfloat32)ZdtypeNr#   zaudio_array.ndim=Zaxis)r   r   r   )ioBytesIOsfZ	SoundFilerK   Z
samplerater   r   r   r   r)   r5   meanr   )	rA   r-   
audio_filerL   r   r   Zaudio_formatZformat_enumr   r   r   r   r2      s   

zAudio.from_bytesFprefixc                 C   s   t   |tv sJ d|dtt $}tj|| j| j| d |	d t
| d}W d   n1 s<w   Y  |rMd|  d| }|S )	a  Convert the audio data to a base64 encoded string.

        Args:
            format: The format to encode the audio in.
            prefix: Whether to add a data prefix to the base64 encoded string.

        Returns:
            The base64 encoded audio data.
        zformat=r$   )r   r   zutf-8Nzdata:audio/z;base64,)r   r*   rO   rP   rQ   writer   r   upperseekr?   	b64encoderK   decoder   )r   r   rT   rS   Z
base64_strr   r   r   	to_base64   s   


zAudio.to_base64audior   c                 C   sD   t | jtrt| jS t | jtrt| jS tdt| j )zCreate an Audio instance from a RawAudio object.

        Args:
            audio: The RawAudio object containing audio data.

        Returns:
            An instance of the Audio class.
        zUnsupported audio data type: )	r%   databytesr   r2   rF   rB   r5   r(   )r[   r   r   r   from_raw_audio   s
   
zAudio.from_raw_audionew_sampling_ratec                 C   s6   | j |krdS t  tj| j| j |dd| _|| _ dS )zResample audio data to a new sampling rate.

        Args:
            new_sampling_rate: The new sampling rate to resample the audio to.
        NZHQ)Zquality)r   r   soxrresampler   )r   r_   r   r   r   ra      s
   

zAudio.resample)r   N)T)F)r[   r   r   r   )__name__
__module____qualname__r&   r'   intrF   r   r"   r   propertyfloatr+   staticmethodboolr9   rB   rM   r]   r2   rZ   r^   ra   r   r   r   r   r   *   s$    

r   freqr   c                 C   s   d}d}dt d }d|  d }t| t jr8t|t js#J t|| |k}|t | | | |  ||< |S | |krG|t | | |  }|S )zConvert frequency from hertz to mels using the "slaney" mel-scale.

    Args:
        freq: The frequency, or multiple frequencies, in hertz (Hz).

    Returns:
        The frequencies on the mel scale.
         @@      .@      ;@皙@      @      i@)r&   logr%   r'   r(   )rj   min_log_hertzmin_log_mellogstepmels
log_regionr   r   r   hertz_to_mel   s   	rw   ru   c                 C   sL   d}d}t dd }d|  d }| |k}|t || | |   ||< |S )zConvert frequency from mels to hertz using the "slaney" mel-scale.

    Args:
        mels: The frequency, or multiple frequencies, in mels.

    Returns:
        The frequencies in hertz.
    rk   rl   rn   rm   rp   ro   )r&   rq   exp)ru   rr   rs   rt   rj   rv   r   r   r   mel_to_hertz   s   	ry   	fft_freqsfilter_freqsc                 C   s   t |}t |dt | d }|ddddf  |dd  }|ddddf |dd  }t t dt ||}|S )a@  Creates a triangular filter bank.

    Adapted from *torchaudio* and *librosa*.

    Args:
        fft_freqs: Discrete frequencies of the FFT bins in Hz.
        filter_freqs: Center frequencies of the triangular filters to create, in Hz.

    Returns:
        array of shape `(num_frequency_bins, num_mel_filters)`
    r   r#   N   )r&   diffexpand_dimsmaximumZzerosminimum)rz   r{   Zfilter_diffZslopesZdown_slopesZ	up_slopesZfilter_bankr   r   r   _create_triangular_filter_bank  s   
" r   num_frequency_binsnum_mel_binsmin_frequencymax_frequencyr   c                 C   s   | dk rt d|  d||krt d| d| t|}t|}t|||d }t|}td|d | }	t|	|}
d|d|d  |d|   }|
t|d9 }
|
jdd	d
k rjt d| d|  d|
S )aV  Create a Mel filter bank matrix for converting frequency bins to the Mel scale.

    This function generates a filter bank matrix that can be used to transform a
    spectrum represented in frequency bins to the Mel scale. The Mel scale is a
    perceptual scale of pitches judged by listeners to be equal in distance from one another.

    Args:
        num_frequency_bins: The number of frequency bins in the input spectrum.
        num_mel_bins: The number of desired Mel bins in the output.
        min_frequency: The minimum frequency (in Hz) to consider.
        max_frequency: The maximum frequency (in Hz) to consider.
        sampling_rate: The sampling rate of the audio signal.

    Returns:
        A filter bank matrix of shape (num_mel_bins, num_frequency_bins)
        that can be used to project frequency bin energies onto Mel bins.
    r~   zRequire num_frequency_bins: z >= 2zRequire min_frequency: z <= max_frequency: r   g       @NrN   g        zNAt least one mel filter has all zero values. The value for `num_mel_filters` (z?) may be set too high. Or, the value for `num_frequency_bins` (z) may be set too low.)	r5   rw   r&   Zlinspacery   r   r   maxany)r   r   r   r   r   Zmel_minZmel_maxZ	mel_freqsr{   rz   Zmel_filtersZenormr   r   r   mel_filter_bank&  s*   
 r   )*r?   rO   loggingr<   enumr   	functoolsr   pathlibr   typingr   r   numpyr&   r/   Zmistral_common.importsr   r   r	   r
   Z&mistral_common.protocol.instruct.chunkr   	getLoggerrb   loggerZ	soundfilerQ   Zavailable_formatsr   r`   __members__valuesr*   r   rg   r'   rw   ry   r   re   r   r   r   r   r   <module>   sR    
 &>