Search code examples
javascriptwebrtchtml5-video

How do you initialize a WebRTC call using the transceiver API but only enable audio&video later after signaling completed?


I am trying to first connect two WebRTC peers. Once the connection is established I want to give the users on both sides the option to enable/disable video and audio. This should happen without triggering the signaling process again.

I do run into an issue though: If I call replaceTrack(audioTack) the remote peer will not playback audio until I also call replaceTrack(video).

I am unsure why this happen and can not find any clue in the documentation. It does play fine after 10 seconds once I also attach the video track. Without video track there is no audio playback. Why?

function createVideoElement() {
  const vid = document.createElement("video")
  vid.width = 320;
  vid.controls = true;
  vid.autoplay = true;
  const root = document.body;
  document.body.appendChild(vid);
  return vid;
}

async function RunTestInit() {

  console.log("get media access");
  const p1_stream_out = await navigator.mediaDevices.getUserMedia({
    video: true,
    audio: true
  });
  const p2_stream_out = await navigator.mediaDevices.getUserMedia({
    video: true,
    audio: true
  });


  console.log("stream setup");
  const p1_stream_in = new MediaStream();
  const p2_stream_in = new MediaStream();

  const p1_video_in = createVideoElement();
  const p2_video_in = createVideoElement();

  console.log("peer setup");
  const p1 = new RTCPeerConnection();
  const p2 = new RTCPeerConnection();
  const p1_tca = p1.addTransceiver("audio", {
    direction: "sendrecv"
  });
  const p1_tcv = p1.addTransceiver("video", {
    direction: "sendrecv"
  });


  p1.onicecandidate = (ev) => {
    p2.addIceCandidate(ev.candidate);
  }
  p2.onicecandidate = (ev) => {
    p1.addIceCandidate(ev.candidate);
  }

  p1.onconnectionstatechange = (ev) => {
    console.log("p1 state: ", p1.connectionState);
  }
  p2.onconnectionstatechange = async (ev) => {
    console.log("p2 state: ", p2.connectionState);
  }

  p1.onnegotiationneeded = () => {
    //triggers once
    console.warn("p1.onnegotiationneeded");
  }

  p2.onnegotiationneeded = () => {
    //should never trigger
    console.warn("p2.onnegotiationneeded");
  }

  p1.ontrack = (ev) => {
    console.log("p1.ontrack", ev);
    p1_stream_in.addTrack(ev.track);
    p1_video_in.srcObject = p1_stream_in;
  }
  p2.ontrack = (ev) => {
    console.log("p2.ontrack", ev);
    p2_stream_in.addTrack(ev.track);
    p2_video_in.srcObject = p2_stream_in;
  }
  console.log("signaling");
  const offer = await p1.createOffer();
  await p1.setLocalDescription(offer);
  await p2.setRemoteDescription(offer);
  const p2_tca = p2.getTransceivers()[0];
  const p2_tcv = p2.getTransceivers()[1];

  p2_tca.direction = "sendrecv"
  p2_tcv.direction = "sendrecv"

  const answer = await p2.createAnswer();
  await p2.setLocalDescription(answer);
  await p1.setRemoteDescription(answer);
  console.log("signaling done");

  //send audio from p2 to p1 (direction doesn't matter)
  //after this runs nothing will happen and no audio plays
  setTimeout(async () => {
    await p2_tca.sender.replaceTrack(p2_stream_out.getAudioTracks()[0]);
    console.warn("audio playback should start now but nothing happens");
  }, 1000);

  //audio starts playing once this runs
  setTimeout(async () => {
    //uncomment this and it works just fine
    await p2_tcv.sender.replaceTrack(p2_stream_out.getVideoTracks()[0]);
    console.warn("now audio playback starts");
  }, 10000);
}

function start() {
  setTimeout(async () => {
    console.log("Init test case");
    await RunTestInit();
  }, 1);
}

Same example in the js fiddle (needs camera and microphone access): https://jsfiddle.net/vnztcx5p/5/

Once audio works this will cause an echo.


Solution

  • that is a known issue. https://bugs.chromium.org/p/chromium/issues/detail?id=813243 and https://bugs.chromium.org/p/chromium/issues/detail?id=403710 have some background information.

    In a nutshell the video element expect you to send audio and video data and these need to be synchronized. But you don't send any video data and the element needs to fire a loadedmetadata and resize event because that is what the specification says. Hence it will block audio indefinitely