Format and send seamless audio from JS to local PyAudio

I'm using Unix sockets to stream audio from my microphone (via an Electron NodeJS app) to a python program listening on the socket and sending the audio to PyAudio for playback.

Electron app getUserMedia() => WAV formatting => NodeJS socket => Unix Socket => Python Socket => PyAudio

I have it working, but there's a constant clicking sound when each chunk starts or ends. Where should I start debugging it? Here's the code:

NodeJS app (sender):

var net = require('net');
const nodeWav = require("node-wav");

var recorder = null;
var volume = null;
var audioInput = null;
var sampleRate = null;
var audioContext = null;
var context = null;
var outputElement = document.getElementById('output');
var outputString;
var bufferSize = 1024;

var mediaSourceIn;

// callback for navigator.mediaDevices.getUserMedia()
function audioReceiver(e) {
    // creates Socket
    mediaSourceIn = e;
    initSocket();
}

var audioSocket;
function initSocket() {
  audioSocket = net.connect('/tmp/audio_input', connected)
  .catch(function(err) {
    console.log("Could not connect...");
    console.log(err);
  });
}

function connected() {
  console.log("CONNECTED TO UNIX SOCKET!");
  audioSocket = this;
  createRecordingTask();
}

function createRecordingTask() {
  // creates the audio context
    audioContext = window.AudioContext || window.webkitAudioContext;
    context = new audioContext();

    // retrieve the current sample rate to be used for WAV packaging
    sampleRate = context.sampleRate;

    // creates a gain node
    volume = context.createGain();

    // creates an audio node from the microphone incoming stream
    audioInput = context.createMediaStreamSource(mediaSourceIn);

    // connect the stream to the gain node
    audioInput.connect(volume);

    /* From the spec: This value controls how frequently the audioprocess event is
    dispatched and how many sample-frames need to be processed each call.
    Lower values for buffer size will result in a lower (better) latency.
    Higher values will be necessary to avoid audio breakup and glitches */
    recorder = context.createScriptProcessor(bufferSize, 2, 2);

    recorder.onaudioprocess = function(e){
        console.log ('recording');
        var left = e.inputBuffer.getChannelData (0);
        var right = e.inputBuffer.getChannelData (1);
        var bf = createAudioBuffer(
          new Float32Array (left),
          new Float32Array (right));

        upload(bf);
    }

    // we connect the recorder
    volume.connect (recorder);
    recorder.connect (context.destination);
}

function mergeBuffers(channelBuffer){
  var result = new Float32Array(bufferSize);
  result.set(channelBuffer); // make a copy?
  return result;
}

function interleave(leftChannel, rightChannel){
  var length = leftChannel.length + rightChannel.length;
  var result = new Float32Array(length);

  var inputIndex = 0;

  for (var index = 0; index < length; ){
    result[index++] = leftChannel[inputIndex];
    result[index++] = rightChannel[inputIndex];
    inputIndex++;
  }
  return result;
}

function writeUTFBytes(view, offset, string){
  var lng = string.length;
  for (var i = 0; i < lng; i++){
    view.setUint8(offset + i, string.charCodeAt(i));
  }
}

function createAudioBuffer(leftchannel, rightchannel) {

  // we flat the left and right channels down
  var leftBuffer = mergeBuffers ( leftchannel, bufferSize );
  var rightBuffer = mergeBuffers ( rightchannel, bufferSize );

  // we interleave both channels together
  var interleaved = interleave ( leftBuffer, rightBuffer );

  // we create our wav file
  var buffer = new ArrayBuffer(44 + interleaved.length * 2);
  //var buffer = new ArrayBuffer(interleaved.length * 2);
  var view = new DataView(buffer);

  // RIFF chunk descriptor
  writeUTFBytes(view, 0, 'RIFF');
  view.setUint32(4, 44 + interleaved.length * 2, true);
  writeUTFBytes(view, 8, 'WAVE');
  // FMT sub-chunk
  writeUTFBytes(view, 12, 'fmt ');
  view.setUint32(16, 16, true);
  view.setUint16(20, 1, true);
  // stereo (2 channels)
  view.setUint16(22, 2, true);
  view.setUint32(24, sampleRate, true);
  view.setUint32(28, sampleRate * 4, true);
  view.setUint16(32, 4, true);
  view.setUint16(34, 16, true);
  // data sub-chunk
  writeUTFBytes(view, 36, 'data');
  view.setUint32(40, interleaved.length * 2, true);
  // write the PCM samples
  var lng = interleaved.length;
  //var index = 0;
  var index = 44;
  var volume = 0.6;
  for (var i = 0; i < lng; i++){
      view.setInt16(index, interleaved[i] * (0x7FFF * volume), true);
      index += 2;
  }
  // our final binary blob
  return Buffer.from(view.buffer);
}


function upload(thatAudio) {
  if (audioSocket.writable) {
    audioSocket.write(thatAudio);
  } else {
    console.log("DISCONNECTED!");
  }
}

Python program (receiver):

import socket
import os
import pyaudio
from threading import Thread

sockfile = "/tmp/audio_input"

FORMAT = pyaudio.paInt16
CHUNK = 1024
CHANNELS = 2
RATE = 44100
frames = []

if os.path.exists(sockfile):
    os.remove(sockfile)

print("Opening socket...")
server = socket.socket( socket.AF_UNIX, socket.SOCK_STREAM )
server.bind(sockfile)
server.listen(5)
conn, addr = server.accept()

print("Creating PyAudio stream...")
p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels = CHANNELS,
                rate = RATE,
                output = True,
                frames_per_buffer = CHUNK,
                )

print "Listening..."
singleChunkSizeBytes = 44 + (CHUNK * CHANNELS*2)
print singleChunkSizeBytes, "bytes at a time"
while True:
    soundData = conn.recv(singleChunkSizeBytes)
    if soundData:
        stream.write(soundData, CHUNK)

server.close()
os.remove( sockfile )

Solution

First you should check if stream.write() causes buffer underruns. This can probably be done with the exception_on_underflow option (see the docs). If you want a non-throwing version of the write() function, you can try the sounddevice module (see its write() docs).

If there are underruns, that may mean that the socket doesn't provide the data fast enough. In this case, you should probably implement some buffering on the receiver side, e.g. using queue.Queue.

If there are no underruns, the error is probably on the sending side ...