Search code examples
pythonmatplotlibaudiopydub

Synchronizing audio and animation in python


I've written code to load an audio file, calculate the frequency spectrum, and animate it. I don't appear to have a way to synchronize the audio with the animation with the tools I'm using.

The roadblock I'm hitting is that pydub doesn't actually tell me where in the audio I am (though I can time that) and matplotlib doesn't give me any control over where I am in the animation and it doesn't give me a guaranteed frame rate.

Is there a technique or a combination of tools that I'm missing that would allow me to solve this particular problem?

The code is below:

from pydub import AudioSegment
from pydub.playback import play
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from scipy import signal
import numpy as np
import threading
import time
from datetime import timedelta

# Load the audio and get the raw data for transformation
sound = AudioSegment.from_mp3("A Day Without Rain - Enya - Flora's Secret.mp3")
sampling_rate = sound.frame_rate
song_length = sound.duration_seconds
left = sound.split_to_mono()[0]
x = left.get_array_of_samples()

# Fourier transform
f, t, Zxx = signal.stft(x, fs=sampling_rate, nperseg=8820, noverlap=5292)
y = np.abs(Zxx.transpose())

# Setup a separate thread to play the music
music_thread = threading.Thread(target=play, args=(sound,))

# Build the figure
fig = plt.figure(figsize=(14, 6))
plt.style.use('seaborn-bright')
ax = plt.axes(xlim=[0, 4000], ylim=[0, 3000])
line1, = ax.plot([], [])


# Matplotlib function to initialize animation
def init():
    global annotation1, annotation2
    line1.set_data([], [])
    annotation1 = plt.annotate("Music: {}".format(""), xy=(0.2, 0.8), xycoords='figure fraction')
    annotation2 = plt.annotate("Animation: {}".format(""), xy=(0.6, 0.8), xycoords='figure fraction')
    return line1,


# Function for the animation
def animate(i):
    global music_start, annotation1, annotation2
    line1.set_data(f, y[i])
    if i == 0:
        music_thread.start()
        music_start = time.perf_counter()
    annotation1.set_text("Music: {}".format(timedelta(seconds=(time.perf_counter() - music_start))))
    annotation2.set_text("Animation: {}".format(timedelta(seconds=i / t.size * song_length)))
    return line1,


anim = FuncAnimation(fig, animate, init_func=init, interval=55)
plt.show()

Solution

  • Well, I did figure out one way to fix my problem.

    It turns out to be easiest just to modify the frame index in the animate function before setting the line data:

    i = round((time.perf_counter() - music_start)/song_length * t.size)