javascript webrtc tensorflow.js face-api

Need recomendation about code, because it kill my programm

) So let it start. I want to realize next idea: i want to connect with some other user on different computer using webrtc(exchange video and audio data) and then recignize his emotions. So in this project i use node-webrtc addon(here is examples). So i`ve downloaded examples and tested video-compositing example and all works fine. Here is result of testing

The next part is to i recognize face emotions. For this task i use face-api.js. I have tested this nice video. I will not attach photo, because now im using ubuntu, but tested it on windows, just believe me just all works fine too. So this is time to unite both modules.

As main project i use node-webrtc examples, all subsequent explanations will be around this module. So to run result you should copy weights folder from face-api into node-webrtc/examples/video-compositing folder, and then just replace code below instead of node-webrtc/example/video-compositing/server.js.

'use strict';

require('@tensorflow/tfjs-node');
const tf = require('@tensorflow/tfjs');
const nodeFetch = require('node-fetch');
const fapi = require('face-api.js');
const path = require('path');
const { createCanvas, createImageData } = require('canvas');
const { RTCVideoSink, RTCVideoSource, i420ToRgba, rgbaToI420 } = require('wrtc').nonstandard;


fapi.env.monkeyPatch({ fetch: nodeFetch });
const MODELS_URL = path.join(__dirname, '/weights');

const width = 640;
const height = 480;

Promise.all([
  fapi.nets.tinyFaceDetector.loadFromDisk(MODELS_URL),
  fapi.nets.faceLandmark68Net.loadFromDisk(MODELS_URL),
  fapi.nets.faceRecognitionNet.loadFromDisk(MODELS_URL),
  fapi.nets.faceExpressionNet.loadFromDisk(MODELS_URL)
]);

function beforeOffer(peerConnection) {
  const source = new RTCVideoSource();
  const track = source.createTrack();
  const transceiver = peerConnection.addTransceiver(track);
  const sink = new RTCVideoSink(transceiver.receiver.track);

  let lastFrame = null;

  function onFrame({ frame }) {
    lastFrame = frame;
  }

  sink.addEventListener('frame', onFrame);

  // TODO(mroberts): Is pixelFormat really necessary?
  const canvas = createCanvas(width, height);
  const context = canvas.getContext('2d', { pixelFormat: 'RGBA24' });
  context.fillStyle = 'white';
  context.fillRect(0, 0, width, height);

  let emotion = '';
  const interval = setInterval(() => {
    if (lastFrame) {
      const lastFrameCanvas = createCanvas(lastFrame.width,  lastFrame.height);
      const lastFrameContext = lastFrameCanvas.getContext('2d', { pixelFormat: 'RGBA24' });

      const rgba = new Uint8ClampedArray(lastFrame.width *  lastFrame.height * 4);
      const rgbaFrame = createImageData(rgba, lastFrame.width, lastFrame.height);
      i420ToRgba(lastFrame, rgbaFrame);

      lastFrameContext.putImageData(rgbaFrame, 0, 0);
      context.drawImage(lastFrameCanvas, 0, 0);

      const emotionsArr = { 0: 'neutral', 1: 'happy', 2: 'sad', 3: 'angry', 4: 'fearful', 5: 'disgusted', 6: 'surprised' };

      async function detectEmotion() {
        let frameTensor3D = tf.browser.fromPixels(lastFrameCanvas)
        let face = await fapi.detectSingleFace(frameTensor3D, new fapi.TinyFaceDetectorOptions()).withFaceExpressions();
        //console.log(face);
        function getEmotion(face) {
          try {
            let mostLikelyEmotion = emotionsArr[0];
            let predictionArruracy =  face.expressions[emotionsArr[0]];

            for (let i = 0; i < Object.keys(face.expressions).length; i++) {
              if (face.expressions[emotionsArr[i]] > predictionArruracy && face.expressions[emotionsArr[i]] < 1 ){
                mostLikelyEmotion = emotionsArr[i];
                predictionArruracy = face.expressions[emotionsArr[i]];
              }
            }

            return mostLikelyEmotion;
          }
          catch (e){
            return '';
          }
        }
        let emot = getEmotion(face);
        return emot;
      }


      detectEmotion().then(function(res) {
        emotion = res;
      });

    } else {
      context.fillStyle = 'rgba(255, 255, 255, 0.025)';
      context.fillRect(0, 0, width, height);
    }

    if (emotion != ''){
      context.font = '60px Sans-serif';
      context.strokeStyle = 'black';
      context.lineWidth = 1;
      context.fillStyle = `rgba(${Math.round(255)}, ${Math.round(255)}, ${Math.round(255)}, 1)`;
      context.textAlign = 'center';
      context.save();
      context.translate(width / 2, height);
      context.strokeText(emotion, 0, 0);
      context.fillText(emotion, 0, 0);
      context.restore();
    }


    const rgbaFrame = context.getImageData(0, 0, width, height);
    const i420Frame = {
      width,
      height,
      data: new Uint8ClampedArray(1.5 * width * height)
    };
    rgbaToI420(rgbaFrame, i420Frame);
    source.onFrame(i420Frame);
  });

  const { close } = peerConnection;
  peerConnection.close = function() {
    clearInterval(interval);
    sink.stop();
    track.stop();
    return close.apply(this, arguments);
  };
}

module.exports = { beforeOffer };

And here is results1, result2 and result3 , all works fine))... Well, no, after 2-3 minutes my computer just stop do anything, i even cant move my mouse and then i get error "Killed" in terminal. I read about this error here and because i changed only one script in project i suspect that somewhere in my code i have data leak and my RAM is filling up over time. Can someone help me with this issue? Why programm ends with killing process? If someone want to test it for himself i`ll leave package json to easy install all requirements.

{
  "name": "node-webrtc-examples",
  "version": "0.1.0",
  "description": "This project presents a few example applications using node-webrtc.",
  "private": true,
  "main": "index.js",
  "scripts": {
    "lint": "eslint index.js examples lib test",
    "start": "node index.js",
    "test": "npm run test:unit && npm run test:integration",
    "test:unit": "tape 'test/unit/**/*.js'",
    "test:integration": "tape 'test/integration/**/*.js'"
  },
  "keywords": [
    "Web",
    "Audio"
  ],
  "author": "Mark Andrus Roberts <[email protected]>",
  "license": "BSD-3-Clause",
  "dependencies": {
    "@tensorflow/tfjs": "^1.2.9",
    "@tensorflow/tfjs-core": "^1.2.9",
    "@tensorflow/tfjs-node": "^1.2.9",
    "Scope": "github:kevincennis/Scope",
    "body-parser": "^1.18.3",
    "browserify-middleware": "^8.1.1",
    "canvas": "^2.6.0",
    "color-space": "^1.16.0",
    "express": "^4.16.4",
    "face-api.js": "^0.21.0",
    "node-fetch": "^2.3.0",
    "uuid": "^3.3.2",
    "wrtc": "^0.4.1"
  },
  "devDependencies": {
    "eslint": "^5.15.1",
    "tape": "^4.10.0"
  }
}

If you have error like "someFunction is not a function" or something like thatt it meight be probably because you need to install @tensorflow/tfjs-core, tfjs and tfjs-node 1.2.9 version. Like npm i @tensorflow/[email protected]. For all 3 packages. Thx for yours answers and understanding))

Solution

i working with faceapi.js and tensorflow.js during this year, I tested your code and its ok, but increased my RAM to 2GB in less one minute, you have a memory leak, when use a Tensor do you should release memory? How do you do?

However you should use --inspect arg in node to inspect memory leak

Only call:

  frameTensor3D.dispose();

I refactoring your code and share with you, i hope help you:

    "use strict";

require("@tensorflow/tfjs-node");
const tf = require("@tensorflow/tfjs");
const nodeFetch = require("node-fetch");
const fapi = require("face-api.js");
const path = require("path");
const { createCanvas, createImageData } = require("canvas");
const {
  RTCVideoSink,
  RTCVideoSource,
  i420ToRgba,
  rgbaToI420
} = require("wrtc").nonstandard;

fapi.env.monkeyPatch({ fetch: nodeFetch });
const MODELS_URL = path.join(__dirname, "/weights");

const width = 640;
const height = 480;

Promise.all([
  fapi.nets.tinyFaceDetector.loadFromDisk(MODELS_URL),
  fapi.nets.faceLandmark68Net.loadFromDisk(MODELS_URL),
  fapi.nets.faceRecognitionNet.loadFromDisk(MODELS_URL),
  fapi.nets.faceExpressionNet.loadFromDisk(MODELS_URL)
]);

function beforeOffer(peerConnection) {
  const source = new RTCVideoSource();
  const track = source.createTrack();
  const transceiver = peerConnection.addTransceiver(track);
  const sink = new RTCVideoSink(transceiver.receiver.track);

  let lastFrame = null;

  function onFrame({ frame }) {
    lastFrame = frame;
  }

  sink.addEventListener("frame", onFrame);

  // TODO(mroberts): Is pixelFormat really necessary?
  const canvas = createCanvas(width, height);
  const context = canvas.getContext("2d", { pixelFormat: "RGBA24" });
  context.fillStyle = "white";
  context.fillRect(0, 0, width, height);
  const emotionsArr = {
    0: "neutral",
    1: "happy",
    2: "sad",
    3: "angry",
    4: "fearful",
    5: "disgusted",
    6: "surprised"
  };
  async function detectEmotion(lastFrameCanvas) {
    const frameTensor3D = tf.browser.fromPixels(lastFrameCanvas);
    const face = await fapi
      .detectSingleFace(
        frameTensor3D,
        new fapi.TinyFaceDetectorOptions({ inputSize: 160 })
      )
      .withFaceExpressions();
    //console.log(face);
    const emo = getEmotion(face);
    frameTensor3D.dispose();
    return emo;
  }
  function getEmotion(face) {
    try {
      let mostLikelyEmotion = emotionsArr[0];
      let predictionArruracy = face.expressions[emotionsArr[0]];

      for (let i = 0; i < Object.keys(face.expressions).length; i++) {
        if (
          face.expressions[emotionsArr[i]] > predictionArruracy &&
          face.expressions[emotionsArr[i]] < 1
        ) {
          mostLikelyEmotion = emotionsArr[i];
          predictionArruracy = face.expressions[emotionsArr[i]];
        }
      }
      //console.log(mostLikelyEmotion);
      return mostLikelyEmotion;
    } catch (e) {
      return "";
    }
  }
  let emotion = "";
  const interval = setInterval(() => {
    if (lastFrame) {
      const lastFrameCanvas = createCanvas(lastFrame.width, lastFrame.height);
      const lastFrameContext = lastFrameCanvas.getContext("2d", {
        pixelFormat: "RGBA24"
      });

      const rgba = new Uint8ClampedArray(
        lastFrame.width * lastFrame.height * 4
      );
      const rgbaFrame = createImageData(
        rgba,
        lastFrame.width,
        lastFrame.height
      );
      i420ToRgba(lastFrame, rgbaFrame);

      lastFrameContext.putImageData(rgbaFrame, 0, 0);
      context.drawImage(lastFrameCanvas, 0, 0);

      detectEmotion(lastFrameCanvas).then(function(res) {
        emotion = res;
      });
    } else {
      context.fillStyle = "rgba(255, 255, 255, 0.025)";
      context.fillRect(0, 0, width, height);
    }

    if (emotion != "") {
      context.font = "60px Sans-serif";
      context.strokeStyle = "black";
      context.lineWidth = 1;
      context.fillStyle = `rgba(${Math.round(255)}, ${Math.round(
        255
      )}, ${Math.round(255)}, 1)`;
      context.textAlign = "center";
      context.save();
      context.translate(width / 2, height);
      context.strokeText(emotion, 0, 0);
      context.fillText(emotion, 0, 0);
      context.restore();
    }

    const rgbaFrame = context.getImageData(0, 0, width, height);
    const i420Frame = {
      width,
      height,
      data: new Uint8ClampedArray(1.5 * width * height)
    };
    rgbaToI420(rgbaFrame, i420Frame);
    source.onFrame(i420Frame);
  });

  const { close } = peerConnection;
  peerConnection.close = function() {
    clearInterval(interval);
    sink.stop();
    track.stop();
    return close.apply(this, arguments);
  };
}

module.exports = { beforeOffer };

Sorry my English, good luck