I'm using Unix sockets to stream audio from my microphone (via an Electron NodeJS app) to a python program listening on the socket and sending the audio to PyAudio for playback.
Electron app getUserMedia()
=> WAV formatting => NodeJS socket => Unix Socket => Python Socket => PyAudio
I have it working, but there's a constant clicking sound when each chunk starts or ends. Where should I start debugging it? Here's the code:
NodeJS app (sender):
var net = require('net');
const nodeWav = require("node-wav");
var recorder = null;
var volume = null;
var audioInput = null;
var sampleRate = null;
var audioContext = null;
var context = null;
var outputElement = document.getElementById('output');
var outputString;
var bufferSize = 1024;
var mediaSourceIn;
// callback for navigator.mediaDevices.getUserMedia()
function audioReceiver(e) {
// creates Socket
mediaSourceIn = e;
initSocket();
}
var audioSocket;
function initSocket() {
audioSocket = net.connect('/tmp/audio_input', connected)
.catch(function(err) {
console.log("Could not connect...");
console.log(err);
});
}
function connected() {
console.log("CONNECTED TO UNIX SOCKET!");
audioSocket = this;
createRecordingTask();
}
function createRecordingTask() {
// creates the audio context
audioContext = window.AudioContext || window.webkitAudioContext;
context = new audioContext();
// retrieve the current sample rate to be used for WAV packaging
sampleRate = context.sampleRate;
// creates a gain node
volume = context.createGain();
// creates an audio node from the microphone incoming stream
audioInput = context.createMediaStreamSource(mediaSourceIn);
// connect the stream to the gain node
audioInput.connect(volume);
/* From the spec: This value controls how frequently the audioprocess event is
dispatched and how many sample-frames need to be processed each call.
Lower values for buffer size will result in a lower (better) latency.
Higher values will be necessary to avoid audio breakup and glitches */
recorder = context.createScriptProcessor(bufferSize, 2, 2);
recorder.onaudioprocess = function(e){
console.log ('recording');
var left = e.inputBuffer.getChannelData (0);
var right = e.inputBuffer.getChannelData (1);
var bf = createAudioBuffer(
new Float32Array (left),
new Float32Array (right));
upload(bf);
}
// we connect the recorder
volume.connect (recorder);
recorder.connect (context.destination);
}
function mergeBuffers(channelBuffer){
var result = new Float32Array(bufferSize);
result.set(channelBuffer); // make a copy?
return result;
}
function interleave(leftChannel, rightChannel){
var length = leftChannel.length + rightChannel.length;
var result = new Float32Array(length);
var inputIndex = 0;
for (var index = 0; index < length; ){
result[index++] = leftChannel[inputIndex];
result[index++] = rightChannel[inputIndex];
inputIndex++;
}
return result;
}
function writeUTFBytes(view, offset, string){
var lng = string.length;
for (var i = 0; i < lng; i++){
view.setUint8(offset + i, string.charCodeAt(i));
}
}
function createAudioBuffer(leftchannel, rightchannel) {
// we flat the left and right channels down
var leftBuffer = mergeBuffers ( leftchannel, bufferSize );
var rightBuffer = mergeBuffers ( rightchannel, bufferSize );
// we interleave both channels together
var interleaved = interleave ( leftBuffer, rightBuffer );
// we create our wav file
var buffer = new ArrayBuffer(44 + interleaved.length * 2);
//var buffer = new ArrayBuffer(interleaved.length * 2);
var view = new DataView(buffer);
// RIFF chunk descriptor
writeUTFBytes(view, 0, 'RIFF');
view.setUint32(4, 44 + interleaved.length * 2, true);
writeUTFBytes(view, 8, 'WAVE');
// FMT sub-chunk
writeUTFBytes(view, 12, 'fmt ');
view.setUint32(16, 16, true);
view.setUint16(20, 1, true);
// stereo (2 channels)
view.setUint16(22, 2, true);
view.setUint32(24, sampleRate, true);
view.setUint32(28, sampleRate * 4, true);
view.setUint16(32, 4, true);
view.setUint16(34, 16, true);
// data sub-chunk
writeUTFBytes(view, 36, 'data');
view.setUint32(40, interleaved.length * 2, true);
// write the PCM samples
var lng = interleaved.length;
//var index = 0;
var index = 44;
var volume = 0.6;
for (var i = 0; i < lng; i++){
view.setInt16(index, interleaved[i] * (0x7FFF * volume), true);
index += 2;
}
// our final binary blob
return Buffer.from(view.buffer);
}
function upload(thatAudio) {
if (audioSocket.writable) {
audioSocket.write(thatAudio);
} else {
console.log("DISCONNECTED!");
}
}
Python program (receiver):
import socket
import os
import pyaudio
from threading import Thread
sockfile = "/tmp/audio_input"
FORMAT = pyaudio.paInt16
CHUNK = 1024
CHANNELS = 2
RATE = 44100
frames = []
if os.path.exists(sockfile):
os.remove(sockfile)
print("Opening socket...")
server = socket.socket( socket.AF_UNIX, socket.SOCK_STREAM )
server.bind(sockfile)
server.listen(5)
conn, addr = server.accept()
print("Creating PyAudio stream...")
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels = CHANNELS,
rate = RATE,
output = True,
frames_per_buffer = CHUNK,
)
print "Listening..."
singleChunkSizeBytes = 44 + (CHUNK * CHANNELS*2)
print singleChunkSizeBytes, "bytes at a time"
while True:
soundData = conn.recv(singleChunkSizeBytes)
if soundData:
stream.write(soundData, CHUNK)
server.close()
os.remove( sockfile )
First you should check if stream.write()
causes buffer underruns. This can probably be done with the exception_on_underflow
option (see the docs).
If you want a non-throwing version of the write()
function, you can try the sounddevice module (see its write()
docs).
If there are underruns, that may mean that the socket doesn't provide the data fast enough. In this case, you should probably implement some buffering on the receiver side, e.g. using queue.Queue.
If there are no underruns, the error is probably on the sending side ...