Search code examples
audiojupyter-notebookfftspectrogram

how to convert audio data to fourier on jupyter notebook?


I want to convert mp3 files using courier's transform and export as spectrogram.Then I need to save as PNG file containing all frequencies of my mp3. How can I do that by using jupyter notebook?


Solution

  • Most of the following comes from: http://myinspirationinformation.com/uncategorized/audio-signals-in-python/

    The mp3 sample comes from the BBC bird song site.

    I ran this in Jupyter notebook using Python 3.6 running under Linux Mint.

    from IPython.display import Audio, display
    import matplotlib.pyplot as plt
    from numpy import fft
    import numpy as np
    import pydub
    from scipy.fftpack import fft
    from scipy.io import wavfile
    import scipy
    import urllib
    
    
    AUDIO_URL='http://downloads.bbc.co.uk/rmhttp/radio4/science/Birdsong-Blackbird.mp3'
    temp_folder = '/home/bill/data/tmp/'
    
    
    urllib.request.urlretrieve(AUDIO_URL, temp_folder+'file.mp3')
    #read mp3 file
    mp3 = pydub.AudioSegment.from_mp3(temp_folder+"file.mp3")
    #convert to wav
    mp3.export(temp_folder+"file.wav", format="wav")
    #read wav file
    freq, audio_data = scipy.io.wavfile.read(temp_folder+"file.wav")
    length = audio_data.shape[0]/freq
    channels = audio_data.shape[1]
    print('freq: {} length: {} channels: {}'.format(freq, length, channels))
    #if stereo grab both channels
    channel1 = audio_data[:,0] #left
    channel2 = audio_data[:,1] #right
    
    #create a time variable in seconds
    time = np.arange(0, float(audio_data.shape[0]), 1) / freq
    
    #plot amplitude (or loudness) over time
    plt.figure(1)
    plt.subplot(211)
    plt.plot(time, channel1, linewidth=0.01, alpha=0.7, color='#ff7f00')
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    plt.subplot(212)
    plt.plot(time, channel2, linewidth=0.01, alpha=0.7, color='#ff7f00')
    plt.xlabel('Time (s)')
    plt.ylabel('Amplitude')
    plt.show()
    
    fourier=fft(channel1)
    
    n = len(channel1)
    fourier = fourier[0:int(n/2)]
    
    # scale by the number of points so that the magnitude does not depend on the length
    fourier = fourier / float(n)
    
    #calculate the frequency at each point in Hz
    freq_array = np.arange(0, (n/2), 1.0) * (freq*1.0/n);
    
    plt.plot(freq_array/1000, 10*np.log10(fourier), color='#ff7f00', linewidth=0.02)
    plt.xlabel('frequency in kHz')
    plt.ylabel('power in dB')
    plt.savefig(temp_folder+'spectrogram.png')