Search code examples
javascriptaudiowebrtcpcm

Web Audio API : efficiently play a PCM stream


Here is a problem:

  • My JS application receives raw PCM data (via WebRTC data channel),
  • The sample rate is 88200 (I can easily change it to 44100 on the other end),
  • Data is already properly encoded in 4-byte float [-1, 1] little-endian samples,
  • Data arrives by chunks of 512 samples (512*4 bytes),
  • Data can start arriving at any moment, it can last any time, it may stop, it may resume.
  • The goal is to render a sound.

What I did is:

var samples = []; // each element of this array stores a chunk of 512 samples
var audioCtx = new AudioContext();
var source = audioCtx.createBufferSource();

source.buffer = audioCtx.createBuffer(1, 512, 88200);

// bufferSize is 512 because it is the size of chunks
var scriptNode = audioCtx.createScriptProcessor(512, 1, 1);

scriptNode.onaudioprocess = function(audioProcessingEvent) {
  // play a chunk if there is at least one.
  if (samples.length > 0) {
    audioProcessingEvent.outputBuffer.copyToChannel(samples.shift(), 0, 0);
  }
};

source.connect(scriptNode);
scriptNode.connect(audioCtx.destination);
source.start();

peerConnection.addEventListener("datachannel", function(e) {
  e.channel.onmessage = function(m) {
    var values = new Float32Array(m.data);
    samples.push(values);
  };
);

There are few issues:

  • audioProcessingEvent.outputBuffer.sampleRate is always 48000. Apparently is does not depend on the bitrate of source and I could not find a way to set it to 88200, 44100 nor any other value. Sound is rendered with a delay that constantly grows.
  • ScriptProcessorNode is deprecated.
  • It is very expensive method in terms of processor.

Thank you in advance for any suggestion!


Solution

  • You want an AudioBuffer.

    You can copy raw PCM data into its channels directly from your TypedArray.
    You can specify its sampleRate, and the AudioContext will take care of the resampling to match the audio card's settings.

    However beware, 2048 bytes per chunk means that every chunk will represent only 5ms of audio data @88Khz: We pass a Float32Array, so the byteSize is 4 and 2048 / 4 / 88200 = ±0.0058s.
    You will probably want to increase this, and to implement some buffering strategy.

    Here is a little demo as a proof of concept storing chunks' data into a buffer Float32Array.

    const min_sample_duration = 2; // sec
    const sample_rate = 88200; // Hz
    // how much data is needed to play for at least min_sample_duration
    const min_sample_size = min_sample_duration * sample_rate;
    
    const fetching_interval = 100; // ms
    
    // you'll probably want this much bigger
    let chunk_size = 2048; // bytes
    
    const log = document.getElementById( 'log' );
    const btn = document.getElementById( 'btn' );
    
    btn.onclick = e => {
    
      let stopped = false;
      let is_reading = false;
      
      const ctx = new AudioContext();
      // to control output volume
      const gain = ctx.createGain();
      gain.gain.value = 0.01;
      gain.connect( ctx.destination );
      // this will get updated at every new fetch
      let fetched_data  = new Float32Array( 0 );
      // keep it accessible so we can stop() it
      let active_node;
    
      // let's begin
      periodicFetch();
    
      // UI update
      btn.textContent = "stop";
      btn.onclick = e => {
        stopped = true;
        if( active_node ) { active_node.stop(0); }
      };
      oninput = handleUIEvents;
    
      // our fake fetcher, calls itself every 50ms
      function periodicFetch() {
    
        // data from server (here just some noise)
        const noise = Float32Array.from( { length: chunk_size / 4 }, _ => (Math.random() * 1) - 0.5 );
        // we concatenate the data just fetched with what we have already buffered
        fetched_data = concatFloat32Arrays( fetched_data, noise );
        // for demo only
        log.textContent = "buffering: " +  fetched_data.length + '/ ' + min_sample_size;
    
        if( !stopped ) {
          // do it again
          setTimeout( periodicFetch , fetching_interval );
        }
        // if we are not actively reading and have fetched enough
        if( !is_reading && fetched_data.length > min_sample_size ) {
          readingLoop(); // start reading
        }
      
      }
      function readingLoop() {
      
        if( stopped  || fetched_data.length < min_sample_size ) {
          is_reading = false;
          return;
        }
        // let the world know we are actively reading
        is_reading = true;
        // create a new AudioBuffer
        const aud_buf = ctx.createBuffer( 1, fetched_data.length, sample_rate );
        // copy our fetched data to its first channel
        aud_buf.copyToChannel( fetched_data, 0 );
    
        // clear the buffered data
        fetched_data = new Float32Array( 0 );
        
        // the actual player
        active_node = ctx.createBufferSource();
        active_node.buffer = aud_buf;
        active_node.onended = readingLoop; // in case we buffered enough while playing
        active_node.connect( gain );
        active_node.start( 0 );
    
      }
    
      function handleUIEvents( evt ) {
    
        const type = evt.target.name;
        const value = evt.target.value;
        switch( type ) {
          case "chunk-size":
            chunk_size = +value;
            break;
          case "volume":
            gain.gain.value = +value;
            break;
        }
    
      }
    
    };
    
    // helpers
    function concatFloat32Arrays( arr1, arr2 ) {
      if( !arr1 || !arr1.length ) {
        return arr2 && arr2.slice();
      }
      if( !arr2 || !arr2.length ) {
        return arr1 && arr1.slice();
      }
      const out = new Float32Array( arr1.length + arr2.length );
      out.set( arr1 );
      out.set( arr2, arr1.length );
      return out;
    }
    label { display: block }
    <button id="btn">start</button>
    <pre id="log"></pre>
    
    <div>
    <label>Output volume:<input type="range" name="volume" min="0" max="0.5" step="0.01" value="0.01"></label>
    </div>
    <div>
    Size of each chunk fetched:
      <label><input type="radio" name="chunk-size" value="2048" checked>2048 bytes (OP's current)</label>
      <label><input type="radio" name="chunk-size" value="35280">35280 bytes (barely enough for 0.1s interval)</label>
      <label><input type="radio" name="chunk-size" value="44100">44100 bytes (enough for 0.1s interval)</label>
    </div>