I'm trying to read a bytes variable using ffmpeg, but the audio stream I listen to, sends me buffer data in mulaw encoded buffer like this:
https://github.com/boblp/mulaw_buffer_data/blob/main/buffer_data
I'm having trouble running the ffmpeg_read function from the transformers library found here:
def ffmpeg_read(bpayload: bytes, sampling_rate: int) -> np.array:
"""
Helper function to read an audio file through ffmpeg.
"""
ar = f"{sampling_rate}"
ac = "1"
format_for_conversion = "f32le"
ffmpeg_command = [
"ffmpeg",
"-i",
"pipe:0",
"-ac",
ac,
"-ar",
ar,
"-f",
format_for_conversion,
"-hide_banner",
"-loglevel",
"quiet",
"pipe:1",
]
try:
with subprocess.Popen(ffmpeg_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE) as ffmpeg_process:
output_stream = ffmpeg_process.communicate(bpayload)
except FileNotFoundError as error:
raise ValueError("ffmpeg was not found but is required to load audio files from filename") from error
out_bytes = output_stream[0]
audio = np.frombuffer(out_bytes, np.float32)
if audio.shape[0] == 0:
raise ValueError(
"Soundfile is either not in the correct format or is malformed. Ensure that the soundfile has "
"a valid audio file extension (e.g. wav, flac or mp3) and is not corrupted. If reading from a remote "
"URL, ensure that the URL is the full address to **download** the audio file."
)
return audio
But everytime I get:
raise ValueError(
"Soundfile is either not in the correct format or is malformed. Ensure that the soundfile has "
"a valid audio file extension (e.g. wav, flac or mp3) and is not corrupted. If reading from a remote "
"URL, ensure that the URL is the full address to **download** the audio file."
)
If I grab any wav file I can do something like this:
import wave
with open('./emma.wav', 'rb') as fd:
contents = fd.read()
print(contents)
And running it through the function does work!
So my question would be:
How can I transform my mulaw encoded buffer data into a valid bytes format that works with ffmpeg_read()
?
EDIT: I've found a way using pywav (https://pypi.org/project/pywav/)
# 1 stands for mono channel, 8000 sample rate, 8 bit, 7 stands
for MULAW encoding
wave_write = pywav.WavWrite("filename.wav", 1, 8000, 8, 7)
wave_write.write(mu_encoded_data)
wave_write.close()
This is the result: https://github.com/boblp/mulaw_buffer_data/blob/main/filename.wav
the background noise is acceptable.
However, I want to use a FFMPEG instead to avoid creating a tmp file.
This worked for me:
import subprocess
import numpy as np
import io
def ffmpeg_read_mulaw(bpayload: bytes, sampling_rate: int) -> np.array:
ar = f"{sampling_rate}"
ac = "1"
format_for_conversion = "f32le"
ffmpeg_command = [
"ffmpeg",
"-f",
"mulaw",
"-ar",
ar,
"-ac",
ac,
"-i",
"pipe:0",
"-b:a",#change the bitrate
"256k", #change the bitrate to 256k
"-f",
format_for_conversion,
"-hide_banner",
"-loglevel",
"quiet",
"pipe:1",
]
try:
with subprocess.Popen(ffmpeg_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE) as ffmpeg_process:
output_stream = ffmpeg_process.communicate(bpayload)
except FileNotFoundError as error:
raise ValueError("ffmpeg was not found but is required to load audio files from filename") from error
out_bytes = output_stream[0]
audio = np.frombuffer(out_bytes, np.float32)
if audio.shape[0] == 0:
raise ValueError("Failed to decode mu-law encoded data with FFMPEG.")
return audio
# Example usage:
# mu_encoded_data is your mu-law encoded buffer data
mu_encoded_data = b"\x7F\xFF\x80\x01\x7F\xFF"
sampling_rate = 8000
decoded_audio = ffmpeg_read_mulaw(mu_encoded_data, sampling_rate)
print(decoded_audio)