OfflineAudioContext Latency Shift

I'm using OfflineAudioContext to download an input file with effects applied. The download works great and it's really fast, but the problem I'm running into is that when I apply gain, I'm using an analyser to signal when the gain should increase or decrease.

This works great when playing the audio using an AudioContext, but the offline version causes the timing of the gain to shift very noticeably. The increase is starting late and the decrease is starting late. It's like there's a latency shift overall.

Is there a way to combat this shift? I'm fine with the rendering process taking longer.

var chunks = [];
var fileInput = document.getElementById("input");
var process = document.getElementById("process");

//Load audio file listener
process.addEventListener(
  "click",
  function () {
    // Web Audio
    var audioCtx2 = new (AudioContext || webkitAudioContext)();
    // Reset buttons and log
    $("#log").empty();
    $("#download_link").addClass("d-none");
    $("#repeat_link").addClass("d-none");

    // Check for file
    if (fileInput.files[0] == undefined) {
      if ($("#upload_err").hasClass("d-none")) {
        $("#upload_err").removeClass("d-none");
      }
      return false;
    }

    var reader1 = new FileReader();

    reader1.onload = function (ev) {
      // console.log("Reader loaded.");

      var tempBuffer = audioCtx2.createBufferSource();

      // Decode audio
      audioCtx2.decodeAudioData(ev.target.result).then(function (buffer) {
        // console.log("Duration1 = " + buffer.duration);

        var offlineAudioCtx = new OfflineAudioContext({
          numberOfChannels: 2,
          length: 44100 * buffer.duration,
          sampleRate: 44100,
        });
        // console.log("test 1");
        // Audio Buffer Source
        var soundSource = offlineAudioCtx.createBufferSource();
        var analyser2d = offlineAudioCtx.createAnalyser();
        var dgate1 = offlineAudioCtx.createGain();
        var dhpf = offlineAudioCtx.createBiquadFilter();
        var dhum60 = offlineAudioCtx.createBiquadFilter();
        var dcompressor = offlineAudioCtx.createDynamicsCompressor();

        dhpf.type = "highpass";
        dhpf.Q.value = 0.5;

        dhum60.type = "notch";
        dhum60.Q.value = 130;

        dcompressor.knee.setValueAtTime(40, offlineAudioCtx.currentTime);
        dcompressor.attack.setValueAtTime(0.1, offlineAudioCtx.currentTime);
        dcompressor.release.setValueAtTime(0.2, offlineAudioCtx.currentTime);

        var reader2 = new FileReader();

        // console.log("Created Reader");

        reader2.onload = function (ev) {
          // console.log("Reading audio data to buffer...");
          $("#log").append("<p>Buffering...</p>");

          soundSource.buffer = buffer;

          let context = offlineAudioCtx;

          //Before Effects

          analyser2d = context.createAnalyser();

          analyser2d.fftSize = 2048;
          analyser2d.smoothingTimeConstant = 0.85;
          const sampleBuffer = new Float32Array(analyser2d.fftSize);

          function loop() {
            analyser2d.getFloatTimeDomainData(sampleBuffer);


            let sumOfSquares = 0;

            for (let i = 0; i < sampleBuffer.length; i++) {
              sumOfSquares += sampleBuffer[i] ** 2;
            }

            const avgPowerDecibels = Math.round(
              10 * Math.log10(sumOfSquares / sampleBuffer.length)
            );

            const gainset = avgPowerDecibels > -50 ? 1 : 0;

            //real-time effects choices start

            if (
              document.getElementById("gate").getAttribute("data-active") ===
              "true"
            ) {
              dgate1.gain.setTargetAtTime(
                gainset,
                offlineAudioCtx.currentTime,
                0.05
              );
            } else if (
              document.getElementById("gate").getAttribute("data-active") ===
              "false"
            ) {
              dgate1.gain.setTargetAtTime(
                1,
                offlineAudioCtx.currentTime,
                0.05
              );
            }

            if (
              document.getElementById("hpf").getAttribute("data-active") ===
              "true"
            ) {
              dhpf.frequency.value = 90;
            } else if (
              document.getElementById("hpf").getAttribute("data-active") ===
              "false"
            ) {
              dhpf.frequency.value = 0;
            }

            if (
              document.getElementById("hum").getAttribute("data-active") ===
              "true"
            ) {
              dhum60.frequency.value = 60;
            } else if (
              document.getElementById("hum").getAttribute("data-active") ===
              "false"
            ) {
              dhum60.frequency.value = 0;
            }

            if (
              document.getElementById("comp").getAttribute("data-active") ===
              "true"
            ) {
              dcompressor.threshold.setValueAtTime(
                -30,

                offlineAudioCtx.currentTime
              );

              dcompressor.ratio.setValueAtTime(
                3.5,
                offlineAudioCtx.currentTime
              );
            } else if (
              document.getElementById("comp").getAttribute("data-active") ===
              "false"
            ) {
              dcompressor.threshold.setValueAtTime(
                0,
                offlineAudioCtx.currentTime
              );

              dcompressor.ratio.setValueAtTime(1, offlineAudioCtx.currentTime);
            }

            // Display value.

            requestAnimationFrame(loop);
          }

          loop();
              soundSource
                .connect(analyser2d)
                .connect(dhpf)
                .connect(dhum60)
                .connect(dgate1)
                .connect(dcompressor);
              dcompressor.connect(offlineAudioCtx.destination);

              offlineAudioCtx
                .startRendering()
                .then(function (renderedBuffer) {
                  // console.log('Rendering completed successfully.');
                  $("#log").append("<p>Rendering new file...</p>");

                  //var song = offlineAudioCtx.createBufferSource();

                  console.log(
                    "OfflineAudioContext.length = " + offlineAudioCtx.length
                  );

                  split(renderedBuffer, offlineAudioCtx.length);

                  $("#log").append("<p>Finished!</p>");
                })
                .catch(function (err) {
                  // console.log('Rendering failed: ' + err);
                  $("#log").append("<p>Rendering failed.</p>");
                });
            
          soundSource.loop = false;
        };
        reader2.readAsArrayBuffer(fileInput.files[0]);
        soundSource.start(0);
      });
    };

    reader1.readAsArrayBuffer(fileInput.files[0]);
  },
  false
);

I've included what I believe is the relevant portion of the code. Let me know if more is needed. Thanks!

Solution

This is doesn't work because the OfflineAudioContext runs faster (sometimes hundreds of times faster) than realtime. Your loop that gets the analyser data is done every 16 ms or so. In a realtime system, this might be accurate enough, but the offline context could be running much, much faster than realtime so by the time you've grabbed the analyser data, much more time has passed. (You can probably see this by printing out the context currentTime in the loop. It will probably increment by more (much more) than 16 ms.

The best way to do this is to use context.suspend(t) to suspend the context at known times so you can grab the analyser data synchronously. Note that the time is rounded so it might not be exactly the time you want but perhaps close enough. Note also that AFAIK, Firefox has not implemented this, and neither has Safari (but will soon).

Here's a short snippet with how do use suspend. There's more than one way though. Untested, but I think the general idea is correct:

// Grab the data every 16 ms, roughly.  `suspend` rounds the time up to
// the nearest multiple of 128 frames (or 128/context.sampleRate time).
for (t = 0; t < <length of buffer>; t += 0.016) {
  context.suspend(t)
    .then(() => {
       // Use analyser to grab the data and compute the values.
       // Use setValueAtTime and friends to adjust the gain and compressor
       // appropriately.  Use context.currentTime to know at what time
       // the context was suspended, and schedule the modifications to be
       // at least 128 samples in the future.  (I think).
   })
   .then(() => context.resume());
}

An alternative is to create a realtime context and process it as you do now, and when the buffer is finished playing, close the context. Of course, you'll have to add something (ScriptProcessorNode, AudioWorkletNode, or MediaRecorder) to capture the rendered data.

If none of these work for you, then I'm not sure what the alternatives would be.