Search code examples
javascripttypescriptweb-audio-apiaudiobuffer

How Can I Store AudioBuffer Without Increasing Memory Usage?


I'm working on a web application that processes audio using the Web Audio API.
I need to store AudioBuffer objects for later playback, but I’m concerned about memory usage, especially when dealing with large or multiple audio files.

Here’s a simplified version of what I’m doing:

async function loadAudio(url) {
  const response = await fetch(url);
  const arrayBuffer = await response.arrayBuffer();
  const audioContext = new AudioContext();
  const audioBuffer = await
  audioContext.decodeAudioData(arrayBuffer);

  // Store the audioBuffer for later use
  storedBuffers.push(audioBuffer);
}

const storedBuffers = [];
loadAudio('path/to/audio/file');

Issues I'm Facing:

  • High Memory Consumption: As I store more AudioBuffer objects, memory usage grows significantly, potentially leading to performance degradation.
  • Memory Management: I'm not sure how to manage or release the memory used by these AudioBuffer objects when they’re no longer needed.

My Questions:

  • How can I store AudioBuffer efficiently without significantly increasing memory usage? Are there techniques or patterns that can help mitigate the memory footprint?
  • Is there a way to compress or serialize an AudioBuffer so that it takes up less memory while stored?
  • What strategies can I use to manage memory when dealing with multiple large AudioBuffer objects?

What I've Considered:

  • Storing the original ArrayBuffer: I thought about storing the original ArrayBuffer instead of the decoded AudioBuffer, but then I would need to re-decode it before playback, which might introduce latency.
  • Using a WeakMap or WeakSet: However, I’m unsure if this would help in this context since I need to actively use the buffers later.

Any advice on how to handle this efficiently would be greatly appreciated!


Solution

  • I made small app which possibly will give you idea how to do what you want. Possibly because myself is difficult to understand what you are exactly doing. So instead of API I' loading file from local drive, but the idea of the cache and lazy loading is the same. Randomizer randomizes buffer starter, they will start from different places. Finally from that random place you can save 5 seconds, just for demo. But before you start it doesn't load anything other then file names to keep memory free.

    In the folder make two files: index.html and app.js

    index.html

    <input type="file" id="fileInput" multiple accept="audio/mp3">
    <div id="audioContainer"></div>
    <script src="app.js"></script>
    

    app.js

    First, there is a MAX_CACHE_SIZE which limits how much audios you can store, it will help mangae cache.

    const MAX_CACHE_SIZE = 5;
    

    now create an audioContext you had it before (browser API).

    const audioContext = new (window.AudioContext || window.webkitAudioContext)();
    

    buffers map to keep audio data once it's loaded.

    const buffers = new Map();
    

    These guys are listening for file changes

    document.getElementById('fileInput').addEventListener('change', handleFiles);
    

    On files select it takes those files and prepares audio items with buttons for starting, stopping, and saving.

    async function handleFiles(event) {
        const files = event.target.files;
        const container = document.getElementById('audioContainer');
        container.innerHTML = '';
    
        for (let file of files) {
            const audioItem = document.createElement('div');
            audioItem.classList.add('audio-item');
    
            const title = document.createElement('h3');
            title.textContent = file.name;
    
            const timeline = document.createElement('div');
            timeline.classList.add('timeline');
    
            const startMarker = document.createElement('div');
            startMarker.classList.add('start-marker');
            timeline.appendChild(startMarker);
    
            const startButton = document.createElement('button');
            startButton.textContent = 'Start';
            startButton.addEventListener('click', () => loadAndPlayAudio(file, startMarker));
    
            const stopButton = document.createElement('button');
            stopButton.textContent = 'Stop';
            stopButton.addEventListener('click', () => stopAudio());
    
            const saveButton = document.createElement('button');
            saveButton.textContent = 'Save 5 Seconds';
            saveButton.addEventListener('click', () => loadAndSaveSnippet(file));
    
            audioItem.appendChild(title);
            audioItem.appendChild(timeline);
            audioItem.appendChild(startButton);
            audioItem.appendChild(stopButton);
            audioItem.appendChild(saveButton);
    
            container.appendChild(audioItem);
        }
    }
    

    There is a currentSource variable to track what's playing.

    let currentSource = null;
    

    loadAndPlayAudio loads audio from the file. If not already in the buffers, it adds the file data into buffers. Plays a random part of the audio, showing where it starts with startMarker. But you need to change it later to retrieve files from your API.

    async function loadAndPlayAudio(file, startMarker) {
        let audioBuffer = buffers.get(file.name);
    
        if (!audioBuffer) {
            audioBuffer = await loadAudioBuffer(file);
            buffers.set(file.name, audioBuffer);
            manageCache();
        }
    
        const randomStart = Math.random() * audioBuffer.duration;
        startMarker.style.left = `${(randomStart / audioBuffer.duration) * 100}%`;
        audioBuffer.randomStart = randomStart;
    
        stopAudio();
        const source = audioContext.createBufferSource();
        source.buffer = audioBuffer;
        source.connect(audioContext.destination);
        source.start(0, randomStart);
        currentSource = source;
    
        console.log(`Playing ${file.name} from ${randomStart.toFixed(2)} seconds`);
    }
    

    If click the save button, loadAndSaveSnippet saves 5 seconds of audio (asks for location to save).

    async function loadAndSaveSnippet(file) {
        let audioBuffer = buffers.get(file.name);
    
        if (!audioBuffer) {
            audioBuffer = await loadAudioBuffer(file);
            buffers.set(file.name, audioBuffer);
            manageCache();
        }
    
        saveAudioSnippet(audioBuffer);
    }
    
    async function loadAudioBuffer(file) {
        const arrayBuffer = await file.arrayBuffer();
        return await audioContext.decodeAudioData(arrayBuffer);
    }
    
    function stopAudio() {
        if (currentSource) {
            currentSource.stop();
            currentSource = null;
        }
    }
    

    This is where you manage cache. It removes the oldest audio buffer when there are more than 5.

    function manageCache() {
        if (buffers.size > MAX_CACHE_SIZE) {
            const oldestKey = buffers.keys().next().value;
            buffers.delete(oldestKey);
            console.log(`Removed ${oldestKey} from cache to manage memory`);
        }
    }
    

    To actually save that 5 second piece, saveAudioSnippet grabs the audio snippet, turns it into a wav file, and then downloads it. (If you want download as mp3 actually you need to convert it.)

    async function saveAudioSnippet(audioBuffer) {
        const snippetDuration = 5;
        const snippetStart = audioBuffer.randomStart;
        const snippetEnd = Math.min(snippetStart + snippetDuration, audioBuffer.duration);
        const snippetBuffer = audioContext.createBuffer(
            audioBuffer.numberOfChannels,
            (snippetEnd - snippetStart) * audioBuffer.sampleRate,
            audioBuffer.sampleRate
        );
    
        for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
            const data = audioBuffer.getChannelData(channel).slice(
                snippetStart * audioBuffer.sampleRate,
                snippetEnd * audioBuffer.sampleRate
            );
            snippetBuffer.copyToChannel(data, channel);
        }
    
        const wavBlob = await bufferToWave(snippetBuffer);
        const url = URL.createObjectURL(wavBlob);
        const anchor = document.createElement('a');
        anchor.href = url;
        anchor.download = `${audioBuffer.name || 'snippet'}.wav`;
        anchor.click();
        URL.revokeObjectURL(url);
    }
    

    And finally, this function converts that audio buffer to a WAV format, handling all the technical bits, and returns it as a Blob for download.

    function bufferToWave(audioBuffer) {
        const numberOfChannels = audioBuffer.numberOfChannels;
        const length = audioBuffer.length * numberOfChannels * 2 + 44;
        const buffer = new ArrayBuffer(length);
        const view = new DataView(buffer);
        const channels = [];
        let offset = 44;
        let pos = 0;
    
        function setUint16(data) {
            view.setUint16(pos, data, true);
            pos += 2;
        }
    
        function setUint32(data) {
            view.setUint32(pos, data, true);
            pos += 4;
        }
    
        setUint32(0x46464952);
        setUint32(length - 8); 
        setUint32(0x45564157); 
        setUint32(0x20746d66); 
        setUint32(16); 
        setUint16(1); 
        setUint16(numberOfChannels);
        setUint32(audioBuffer.sampleRate);
        setUint32(audioBuffer.sampleRate * 2 * numberOfChannels); 
        setUint16(numberOfChannels * 2); 
        setUint16(16); 
    
        setUint32(0x61746164); 
        setUint32(length - pos - 4); 
    
        for (let i = 0; i < audioBuffer.numberOfChannels; i++) {
            channels.push(audioBuffer.getChannelData(i));
        }
    
        while (pos < length) {
            for (let i = 0; i < numberOfChannels; i++) {
                const sample = Math.max(-1, Math.min(1, channels[i][offset])); 
                view.setInt16(pos, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
                pos += 2;
            }
            offset++;
        }
    
        return new Blob([buffer], { type: 'audio/wav' });
    }
    

    When you put these two files together, for testing and modification purposes

    npm install -g http-server and run http-server

    You will get

    http-server
    Starting up http-server, serving ./
    
    http-server version: 14.1.1
    
    http-server settings: 
    CORS: disabled
    Cache: 3600 seconds
    Connection Timeout: 120 seconds
    Directory Listings: visible
    AutoIndex: visible
    Serve GZIP Files: false
    Serve Brotli Files: false
    Default File Extension: none
    
    Available on:
      http://192.168.1.70:8080
      http://127.0.0.1:8080
    Hit CTRL-C to stop the server
    

    Similar to this.

    Now in your browser type http://127.0.0.1:8080 or http://localhost:8080 and you'r in the app.

    I'll put this here for easy copy/paste...

    const MAX_CACHE_SIZE = 5;
    const audioContext = new(window.AudioContext || window.webkitAudioContext)();
    const buffers = new Map();
    
    document.getElementById('fileInput').addEventListener('change', handleFiles);
    
    async function handleFiles(event) {
      const files = event.target.files;
      const container = document.getElementById('audioContainer');
      container.innerHTML = '';
    
      for (let file of files) {
        const audioItem = document.createElement('div');
        audioItem.classList.add('audio-item');
    
        const title = document.createElement('h3');
        title.textContent = file.name;
    
        const timeline = document.createElement('div');
        timeline.classList.add('timeline');
    
        const startMarker = document.createElement('div');
        startMarker.classList.add('start-marker');
        timeline.appendChild(startMarker);
    
        const startButton = document.createElement('button');
        startButton.textContent = 'Start';
        startButton.addEventListener('click', () => loadAndPlayAudio(file, startMarker));
    
        const stopButton = document.createElement('button');
        stopButton.textContent = 'Stop';
        stopButton.addEventListener('click', () => stopAudio());
    
        const saveButton = document.createElement('button');
        saveButton.textContent = 'Save 5 Seconds';
        saveButton.addEventListener('click', () => loadAndSaveSnippet(file));
    
        audioItem.appendChild(title);
        audioItem.appendChild(timeline);
        audioItem.appendChild(startButton);
        audioItem.appendChild(stopButton);
        audioItem.appendChild(saveButton);
    
        container.appendChild(audioItem);
      }
    }
    
    let currentSource = null;
    
    async function loadAndPlayAudio(file, startMarker) {
      let audioBuffer = buffers.get(file.name);
    
      if (!audioBuffer) {
        audioBuffer = await loadAudioBuffer(file);
        buffers.set(file.name, audioBuffer);
        manageCache();
      }
    
      const randomStart = Math.random() * audioBuffer.duration;
      startMarker.style.left = `${(randomStart / audioBuffer.duration) * 100}%`;
      audioBuffer.randomStart = randomStart;
    
      stopAudio();
      const source = audioContext.createBufferSource();
      source.buffer = audioBuffer;
      source.connect(audioContext.destination);
      source.start(0, randomStart);
      currentSource = source;
    
      console.log(`Playing ${file.name} from ${randomStart.toFixed(2)} seconds`);
    }
    
    async function loadAndSaveSnippet(file) {
      let audioBuffer = buffers.get(file.name);
    
      if (!audioBuffer) {
        audioBuffer = await loadAudioBuffer(file);
        buffers.set(file.name, audioBuffer);
        manageCache();
      }
    
      saveAudioSnippet(audioBuffer);
    }
    
    async function loadAudioBuffer(file) {
      const arrayBuffer = await file.arrayBuffer();
      return await audioContext.decodeAudioData(arrayBuffer);
    }
    
    function stopAudio() {
      if (currentSource) {
        currentSource.stop();
        currentSource = null;
      }
    }
    
    function manageCache() {
      if (buffers.size > MAX_CACHE_SIZE) {
        const oldestKey = buffers.keys().next().value;
        buffers.delete(oldestKey);
        console.log(`Removed ${oldestKey} from cache to manage memory`);
      }
    }
    
    
    async function saveAudioSnippet(audioBuffer) {
      const snippetDuration = 5;
      const snippetStart = audioBuffer.randomStart;
      const snippetEnd = Math.min(snippetStart + snippetDuration, audioBuffer.duration);
      const snippetBuffer = audioContext.createBuffer(
        audioBuffer.numberOfChannels,
        (snippetEnd - snippetStart) * audioBuffer.sampleRate,
        audioBuffer.sampleRate
      );
    
      for (let channel = 0; channel < audioBuffer.numberOfChannels; channel++) {
        const data = audioBuffer.getChannelData(channel).slice(
          snippetStart * audioBuffer.sampleRate,
          snippetEnd * audioBuffer.sampleRate
        );
        snippetBuffer.copyToChannel(data, channel);
      }
    
      const wavBlob = await bufferToWave(snippetBuffer);
      const url = URL.createObjectURL(wavBlob);
      const anchor = document.createElement('a');
      anchor.href = url;
      anchor.download = `${audioBuffer.name || 'snippet'}.wav`;
      anchor.click();
      URL.revokeObjectURL(url);
    }
    
    function bufferToWave(audioBuffer) {
      const numberOfChannels = audioBuffer.numberOfChannels;
      const length = audioBuffer.length * numberOfChannels * 2 + 44;
      const buffer = new ArrayBuffer(length);
      const view = new DataView(buffer);
      const channels = [];
      let offset = 44;
      let pos = 0;
    
      function setUint16(data) {
        view.setUint16(pos, data, true);
        pos += 2;
      }
    
      function setUint32(data) {
        view.setUint32(pos, data, true);
        pos += 4;
      }
    
      setUint32(0x46464952);
      setUint32(length - 8);
      setUint32(0x45564157);
      setUint32(0x20746d66);
      setUint32(16);
      setUint16(1);
      setUint16(numberOfChannels);
      setUint32(audioBuffer.sampleRate);
      setUint32(audioBuffer.sampleRate * 2 * numberOfChannels);
      setUint16(numberOfChannels * 2);
      setUint16(16);
    
      setUint32(0x61746164);
      setUint32(length - pos - 4);
    
      for (let i = 0; i < audioBuffer.numberOfChannels; i++) {
        channels.push(audioBuffer.getChannelData(i));
      }
    
      while (pos < length) {
        for (let i = 0; i < numberOfChannels; i++) {
          const sample = Math.max(-1, Math.min(1, channels[i][offset]));
          view.setInt16(pos, sample < 0 ? sample * 0x8000 : sample * 0x7FFF, true);
          pos += 2;
        }
        offset++;
      }
    
      return new Blob([buffer], {
        type: 'audio/wav'
      });
    }
    .audio-item {
      margin-bottom: 20px;
    }
    
    .timeline {
      width: 100%;
      height: 20px;
      background-color: #f3f3f3;
      border: 1px solid #ccc;
      position: relative;
    }
    
    .start-marker {
      width: 2px;
      height: 100%;
      background-color: #ff0000;
      position: absolute;
      top: 0;
    }
    <input type="file" id="fileInput" multiple accept="audio/mp3">
    <div id="audioContainer"></div>
    <script src="app.js"></script>