Stream realtime audio over socket.io

How do i stream realtime audio from one client to possibly multiple clients with socket.io?

I got to the point where i can record audio and playback the audio in the same tab.

That's my current code for this:

$(document).ready(function () {
    var socket = io("ws://127.0.0.1:4385");

    if (navigator.mediaDevices) {
        console.log('getUserMedia supported.');

        var constraints = { audio: true };

        navigator.mediaDevices.getUserMedia(constraints)
            .then(function (stream) {

                let ctx = new AudioContext();
                let source = ctx.createMediaStreamSource(stream);
                let destination = ctx.createMediaStreamDestination();
                source.connect(ctx.destination);
            })
            .catch(function (err) {
                console.log('The following error occurred: ' + err);
            })
    }
});

How do i send that audio stream to my socket.io server and then back to another client?

I heard about WebRTC, but i don't want a peer to peer solution as this would put load on the client, if there are multiple clients who want to listen to the audio.

Somehow there has to be a way to retrieve the raw audio data send it to my socket.io server which in turn sends it back to clients who want to listen to it.

Solution

After a bunch of trial and error i got to a solution I'm satisfied with. Here is the client side javascript. Server side socket.io server just forwards the data to the correct clients, should be trivial.

There is some frontend stuff in there as well. Just ignore it.

main.js

var socket;
var ctx;
var playbackBuffers = {};
var audioWorkletNodes = {};
var isMuted = true;

$(document).ready(function () {

    $('#login-form').on('submit', function (e) {
        e.preventDefault();
        $('#login-view').hide();
        $('#content-view').show();
        connectToVoiceServer($('#username').val());
        createAudioContext();

        $('#mute-toggle').click(function () {
            isMuted = !isMuted;
            if (isMuted) {
                $('#mute-toggle').html('<i class="bi bi-mic-mute"></i>');
            } else {
                $('#mute-toggle').html('<i class="bi bi-mic"></i>');
            }
        });

        if (navigator.mediaDevices) {
            setupRecordWorklet();
        } else {
            // TODO: Display warning can not access microphone
        }
    });
});

function setupRecordWorklet() {
    navigator.mediaDevices.getUserMedia({ audio: true })
        .then(async function (stream) {
            await ctx.audioWorklet.addModule('./js/record-processor.js');
            let src = ctx.createMediaStreamSource(stream);

            const processor = new AudioWorkletNode(ctx, 'record-processor');

            let recordBuffer;
            processor.port.onmessage = (e) => {
                if (e.data.eventType === 'buffer') {
                    recordBuffer = new Float32Array(e.data.buffer);
                }
                if (e.data.eventType === 'data' && !isMuted) {
                    socket.volatile.emit('voice', { id: socket.id, buffer: recordBuffer.slice(e.data.start, e.data.end).buffer });
                }
            }
            src.connect(processor);
        })
        .catch(function (err) {
            console.log('The following error occurred: ' + err);
        });

    socket.on('voice', data => {
        if (playbackBuffers[data.id]) {
            let buffer = new Float32Array(data.buffer);
            playbackBuffers[data.id].buffer.set(buffer, playbackBuffers[data.id].cursor);
            playbackBuffers[data.id].cursor += buffer.length;
            playbackBuffers[data.id].cursor %= buffer.length * 4;
        }
    });
}

function createAudioContext() {
    ctx = new AudioContext();
}

function connectToVoiceServer(username) {
    socket = io("wss://example.com", { query: `username=${username}` });

    socket.on("connect", function () {

    });

    socket.on('user:connect', function (user) {
        addUser(user.id, user.username);
    });

    socket.on('user:disconnect', function (id) {
        removeUser(id);
    });

    socket.on('user:list', function (users) {
        users.forEach(function (user) {
            addUser(user.id, user.username);
        });
    });
}

function addUser(id, username) {
    $('#user-list').append(`<li id="${id}" class="list-group-item text-truncate">${username}</li>`);
    addUserAudio(id);
}

function removeUser(id) {
    $('#' + id).remove();
    removeUserAudio(id);
}

async function addUserAudio(id) {
    await ctx.audioWorklet.addModule('./js/playback-processor.js');
    audioWorkletNodes[id] = new AudioWorkletNode(ctx, 'playback-processor');

    audioWorkletNodes[id].port.onmessage = (e) => {
        if (e.data.eventType === 'buffer') {
            playbackBuffers[id] = { cursor: 0, buffer: new Float32Array(e.data.buffer) };
        }
    }

    audioWorkletNodes[id].connect(ctx.destination);
}

function removeUserAudio(id) {
    audioWorkletNodes[id].disconnect();
    audioWorkletNodes[id] = undefined;
    playbackBuffers[id] = undefined;
}

record-processor.js

class RecordProcessor extends AudioWorkletProcessor {

    constructor() {
        super();
        this._cursor = 0;
        this._bufferSize = 8192 * 4;
        this._sharedBuffer = new SharedArrayBuffer(this._bufferSize);
        this._sharedView = new Float32Array(this._sharedBuffer);
        this.port.postMessage({
            eventType: 'buffer',
            buffer: this._sharedBuffer
        });
    }

    process(inputs, outputs) {

        for (let i = 0; i < inputs[0][0].length; i++) {
            this._sharedView[(i + this._cursor) % this._sharedView.length] = inputs[0][0][i];
        }

        if (((this._cursor + inputs[0][0].length) % (this._sharedView.length / 4)) === 0) {
            this.port.postMessage({
                eventType: 'data',
                start: this._cursor - this._sharedView.length / 4 + inputs[0][0].length,
                end: this._cursor + inputs[0][0].length
            });
        }

        this._cursor += inputs[0][0].length;
        this._cursor %= this._sharedView.length;

        return true;
    }
}

registerProcessor('record-processor', RecordProcessor);

playback-processor.js

class PlaybackProcessor extends AudioWorkletProcessor {

    constructor() {
        super();
        this._cursor = 0;
        this._bufferSize = 8192 * 4;
        this._sharedBuffer = new SharedArrayBuffer(this._bufferSize);
        this._sharedView = new Float32Array(this._sharedBuffer);
        this.port.postMessage({
            eventType: 'buffer',
            buffer: this._sharedBuffer
        });
    }

    process(inputs, outputs) {

        for (let i = 0; i < outputs[0][0].length; i++) {
            outputs[0][0][i] = this._sharedView[i + this._cursor];
            this._sharedView[i + this._cursor] = 0;
        }

        this._cursor += outputs[0][0].length;
        this._cursor %= this._sharedView.length;

        return true;
    }
}

registerProcessor('playback-processor', PlaybackProcessor);

Things to keep in mind:

I'm using SharedArrayBuffers to read/write to the AudioWorklet. For them to work your server has to serve the webpage with headers: Cross-Origin-Opener-Policy=same-origin and Cross-Origin-Embedder-Policy=require-corp
This will transfer uncompressed non-interleaved IEEE754 32-bit linear PCM Audio. So the data which gets transferred over network will be huge. Compression has to be added!
This assumes the sample rate is the same for the sender and receiver