opencv image-processing processing video-processing face-detection

How to determine the distance between upper lip and lower lip by using webcam in Processing?

Where should I start? I can see plenty of face recognition and analysis using Python, Java script but how about Processing ?

I want to determine the distance by using 2 points between upper and lower lip at their highest and lowest point via webcam to use it in further project.

any help would be appreciated

Solution

If you want to do it in Processing alone you can use Greg Borenstein's OpenCV for Processing library:

You can start with the Face Detection example
Once you detect a face, you can detect a mouth within the face rectangle using OpenCV.CASCADE_MOUTH.
Once you have mouth detected maybe you can get away with using the mouth bounding box height. For more detail you use OpenCV to threshold that rectangle. Hopefully the open mouth will segment nicely from the rest of the skin. Finding contours should give you lists of points you can work with.

For something a lot more exact, you can use Jason Saragih's CLM FaceTracker, which is available as an OpenFrameworks addon. OpenFrameworks has similarities to Processing. If you do need this sort of accuracy in Processing you can run FaceOSC in the background and read the mouth coordinates in Processing using oscP5

Update

For the first option, using HAAR cascade classifiers, turns out there are a couple of issues:

The OpenCV Processing library can load one cascade and a second instance will override the first.
The OpenCV.CASCADE_MOUTH seems to work better for closed mouths, but not very well with open mouths

To get past the 1st issue, you can use the OpenCV Java API directly, bypassing OpenCV Processing for multiple cascade detection.

There are couple of parameters that can help the detection, such as having idea of the bounding box of the mouth before hand to pass as a hint to the classifier. I've done a basic test using a webcam on my laptop and measure the bounding box for face and mouth at various distances. Here's an example:

import gab.opencv.*;
import org.opencv.core.*;
import org.opencv.objdetect.*;

import processing.video.*;

Capture video;
OpenCV opencv;

CascadeClassifier faceDetector,mouthDetector;
MatOfRect faceDetections,mouthDetections;

//cascade detections parameters - explanations from Mastering OpenCV with Practical Computer Vision Projects
int flags = Objdetect.CASCADE_FIND_BIGGEST_OBJECT;
// Smallest object size.
Size minFeatureSizeFace = new Size(50,60);
Size maxFeatureSizeFace = new Size(125,150);
Size minFeatureSizeMouth = new Size(30,10);
Size maxFeatureSizeMouth = new Size(120,60);

// How detailed should the search be. Must be larger than 1.0.
float searchScaleFactor = 1.1f;
// How much the detections should be filtered out. This should depend on how bad false detections are to your system.
// minNeighbors=2 means lots of good+bad detections, and minNeighbors=6 means only good detections are given but some are missed.
int minNeighbors = 4;
//laptop webcam face rectangle
//far, small scale, ~50,60px
//typing distance, ~83,91px
//really close, ~125,150
//laptop webcam mouth rectangle
//far, small scale, ~30,10
//typing distance, ~50,25px
//really close, ~120,60

int mouthHeightHistory = 30;
int[] mouthHeights = new int[mouthHeightHistory]; 

void setup() {
  opencv = new OpenCV(this,320,240);
  size(opencv.width, opencv.height);
  noFill();
  frameRate(30);

  video = new Capture(this,width,height);
  video.start();

  faceDetector = new CascadeClassifier(dataPath("haarcascade_frontalface_alt2.xml"));
  mouthDetector = new CascadeClassifier(dataPath("haarcascade_mcs_mouth.xml"));

}

void draw() {
  //feed cam image to OpenCV, it turns it to grayscale
  opencv.loadImage(video);
  opencv.equalizeHistogram();
  image(opencv.getOutput(), 0, 0 );

  //detect face using raw Java OpenCV API
  Mat equalizedImg = opencv.getGray();
  faceDetections = new MatOfRect();
  faceDetector.detectMultiScale(equalizedImg, faceDetections, searchScaleFactor, minNeighbors, flags, minFeatureSizeFace, maxFeatureSizeFace);
  Rect[] faceDetectionResults = faceDetections.toArray();
  int faces = faceDetectionResults.length;
  text("detected faces: "+faces,5,15);
  if(faces >= 1){
    Rect face = faceDetectionResults[0];
    stroke(0,192,0);
    rect(face.x,face.y,face.width,face.height);
    //detect mouth - only within face rectangle, not the whole frame
    Rect faceLower = face.clone();
    faceLower.height = (int) (face.height * 0.65);
    faceLower.y = face.y + faceLower.height; 
    Mat faceROI = equalizedImg.submat(faceLower);
    //debug view of ROI
    PImage faceImg = createImage(faceLower.width,faceLower.height,RGB);
    opencv.toPImage(faceROI,faceImg);
    image(faceImg,width-faceImg.width,0);

    mouthDetections = new MatOfRect();
    mouthDetector.detectMultiScale(faceROI, mouthDetections, searchScaleFactor, minNeighbors, flags, minFeatureSizeMouth, maxFeatureSizeMouth);
    Rect[] mouthDetectionResults = mouthDetections.toArray();
    int mouths = mouthDetectionResults.length;
    text("detected mouths: "+mouths,5,25);
    if(mouths >= 1){
      Rect mouth = mouthDetectionResults[0];
      stroke(192,0,0);
      rect(faceLower.x + mouth.x,faceLower.y + mouth.y,mouth.width,mouth.height);
      text("mouth height:"+mouth.height+"~px",5,35);
      updateAndPlotMouthHistory(mouth.height);
    }
  }
}
void updateAndPlotMouthHistory(int newHeight){
  //shift older values by 1
  for(int i = mouthHeightHistory-1; i > 0; i--){
    mouthHeights[i] = mouthHeights[i-1];
  } 
  //add new value at the front
  mouthHeights[0] = newHeight;
  //plot
  float graphWidth = 100.0;
  float elementWidth = graphWidth / mouthHeightHistory;  
  for(int i = 0; i < mouthHeightHistory; i++){
    rect(elementWidth * i,45,elementWidth,mouthHeights[i]);
  }
}
void captureEvent(Capture c) {
  c.read();
}

One very imortant note to make: I've copied cascade xml files from the OpenCV Processing library folder (~/Documents/Processing/libraries/opencv_processing/library/cascade-files) to the sketch's data folder. My sketch is OpenCVMouthOpen, so the folder structure looks like this:

OpenCVMouthOpen
├── OpenCVMouthOpen.pde
└── data
    ├── haarcascade_frontalface_alt.xml
    ├── haarcascade_frontalface_alt2.xml
    ├── haarcascade_frontalface_alt_tree.xml
    ├── haarcascade_frontalface_default.xml
    ├── haarcascade_mcs_mouth.xml
    └── lbpcascade_frontalface.xml

If you don't copy the cascades files and use the code as it is you won't get any errors, but the detection simply won't work. If you want to check, you can do

println(faceDetector.empty())

at the end of the setup() function and if you get false, the cascade has been loaded and if you get true, the cascade hasn't been loaded.

You may need to play with the minFeatureSize and maxFeatureSize values for face and mouth for your setup. The second issue, cascade not detecting wide open mouth very well is tricky. There might be an already trained cascade for open mouths, but you'd need to find it. Otherwise, with this method you may need to train one yourself and that can be a bit tedious.

Nevertheless, notice that there is an upside down plot drawn on the left when a mouth is detected. In my tests I noticed that the height isn't super accurate, but there are noticeable changes in the graph. You may not be able to get a steady mouth height, but by comparing current to averaged previous height values you should see some peaks (values going from positive to negative or vice-versa) which give you an idea of a mouth open/close change.

Although searching through the whole image for a mouth as opposed to a face only can be a bit slower and less accurate, it's a simpler setup. It you can get away with less accuracy and more false positives on your project this could be simpler:

import gab.opencv.*;
import java.awt.Rectangle;
import org.opencv.objdetect.Objdetect;
import processing.video.*;

Capture video;
OpenCV opencv;
Rectangle[] faces,mouths;

//cascade detections parameters - explanations from Mastering OpenCV with Practical Computer Vision Projects
int flags = Objdetect.CASCADE_FIND_BIGGEST_OBJECT;
// Smallest object size.
int minFeatureSize = 20;
int maxFeatureSize = 150;
// How detailed should the search be. Must be larger than 1.0.
float searchScaleFactor = 1.1f;
// How much the detections should be filtered out. This should depend on how bad false detections are to your system.
// minNeighbors=2 means lots of good+bad detections, and minNeighbors=6 means only good detections are given but some are missed.
int minNeighbors = 6;

void setup() {
  size(320, 240);
  noFill();
  stroke(0, 192, 0);
  strokeWeight(3);

  video = new Capture(this,width,height);
  video.start();

  opencv  = new OpenCV(this,320,240);
  opencv.loadCascade(OpenCV.CASCADE_MOUTH);
}

void draw() {
  //feed cam image to OpenCV, it turns it to grayscale
  opencv.loadImage(video);
  opencv.equalizeHistogram();
  image(opencv.getOutput(), 0, 0 );

  Rectangle[] mouths = opencv.detect(searchScaleFactor,minNeighbors,flags,minFeatureSize, maxFeatureSize);
  for (int i = 0; i < mouths.length; i++) {
    text(mouths[i].x + "," + mouths[i].y + "," + mouths[i].width + "," + mouths[i].height,mouths[i].x, mouths[i].y);
    rect(mouths[i].x, mouths[i].y, mouths[i].width, mouths[i].height);
  }
}
void captureEvent(Capture c) {
  c.read();
}

I was mentioning segmenting/thresholding as well. Here's a rough example using the lower part of a detected face just a basic threshold, then some basic morphological filters (erode/dilate) to cleanup the thresholded image a bit:

import gab.opencv.*;
import org.opencv.core.*;
import org.opencv.objdetect.*;
import org.opencv.imgproc.Imgproc;
import java.awt.Rectangle;
import java.util.*;

import processing.video.*;

Capture video;
OpenCV opencv;

CascadeClassifier faceDetector,mouthDetector;
MatOfRect faceDetections,mouthDetections;

//cascade detections parameters - explanations from Mastering OpenCV with Practical Computer Vision Projects
int flags = Objdetect.CASCADE_FIND_BIGGEST_OBJECT;
// Smallest object size.
Size minFeatureSizeFace = new Size(50,60);
Size maxFeatureSizeFace = new Size(125,150);

// How detailed should the search be. Must be larger than 1.0.
float searchScaleFactor = 1.1f;
// How much the detections should be filtered out. This should depend on how bad false detections are to your system.
// minNeighbors=2 means lots of good+bad detections, and minNeighbors=6 means only good detections are given but some are missed.
int minNeighbors = 4;
//laptop webcam face rectangle
//far, small scale, ~50,60px
//typing distance, ~83,91px
//really close, ~125,150

float threshold = 160;
int erodeAmt = 1;
int dilateAmt = 5;

void setup() {
  opencv = new OpenCV(this,320,240);
  size(opencv.width, opencv.height);
  noFill();

  video = new Capture(this,width,height);
  video.start();

  faceDetector = new CascadeClassifier(dataPath("haarcascade_frontalface_alt2.xml"));
  mouthDetector = new CascadeClassifier(dataPath("haarcascade_mcs_mouth.xml"));

}

void draw() {
  //feed cam image to OpenCV, it turns it to grayscale
  opencv.loadImage(video);

  opencv.equalizeHistogram();
  image(opencv.getOutput(), 0, 0 );

  //detect face using raw Java OpenCV API
  Mat equalizedImg = opencv.getGray();
  faceDetections = new MatOfRect();
  faceDetector.detectMultiScale(equalizedImg, faceDetections, searchScaleFactor, minNeighbors, flags, minFeatureSizeFace, maxFeatureSizeFace);
  Rect[] faceDetectionResults = faceDetections.toArray();
  int faces = faceDetectionResults.length;
  text("detected faces: "+faces,5,15);
  if(faces > 0){
    Rect face = faceDetectionResults[0];
    stroke(0,192,0);
    rect(face.x,face.y,face.width,face.height);
    //detect mouth - only within face rectangle, not the whole frame
    Rect faceLower = face.clone();
    faceLower.height = (int) (face.height * 0.55);
    faceLower.y = face.y + faceLower.height; 
    //submat grabs a portion of the image (submatrix) = our region of interest (ROI)
    Mat faceROI = equalizedImg.submat(faceLower);
    Mat faceROIThresh = faceROI.clone();
    //threshold
    Imgproc.threshold(faceROI, faceROIThresh, threshold, width, Imgproc.THRESH_BINARY_INV);
    Imgproc.erode(faceROIThresh, faceROIThresh, new Mat(), new Point(-1,-1), erodeAmt);
    Imgproc.dilate(faceROIThresh, faceROIThresh, new Mat(), new Point(-1,-1), dilateAmt);
    //find contours
    Mat faceContours = faceROIThresh.clone();
    List<MatOfPoint> contours = new ArrayList<MatOfPoint>();
    Imgproc.findContours(faceContours, contours, new Mat(), Imgproc.RETR_EXTERNAL , Imgproc.CHAIN_APPROX_SIMPLE);
    //draw contours
    for(int i = 0 ; i < contours.size(); i++){
      MatOfPoint contour = contours.get(i);
      Point[] points = contour.toArray();
      stroke(map(i,0,contours.size()-1,32,255),0,0);
      beginShape();
      for(Point p : points){
        vertex((float)p.x,(float)p.y);
      }
      endShape();
    }

    //debug view of ROI
    PImage faceImg = createImage(faceLower.width,faceLower.height,RGB);
    opencv.toPImage(faceROIThresh,faceImg);
    image(faceImg,width-faceImg.width,0);
  }
  text("Drag mouseX to control threshold: " + threshold+
      "\nHold 'e' and drag mouseX to control erodeAmt: " + erodeAmt+
      "\nHold 'd' and drag mouseX to control dilateAmt: " + dilateAmt,5,210);
}
void mouseDragged(){
  if(keyPressed){
    if(key == 'e') erodeAmt = (int)map(mouseX,0,width,1,6);
    if(key == 'd') dilateAmt = (int)map(mouseX,0,width,1,10);
  }else{
    threshold = mouseX;
  }
}
void captureEvent(Capture c) {
  c.read();
}

This could be improved a bit by using YCrCb colour space to segment skin better, but overall you notice that there are quite a few variables to get right which doesn't make this a very flexible setup.

You will be much better results using FaceOSC and reading the values you need in Processing via oscP5. Here is a slightly simplified version of the FaceOSCReceiver Processing example focusing mainly on mouth:

import oscP5.*;
OscP5 oscP5;

// num faces found
int found;

// pose
float poseScale;
PVector posePosition = new PVector();


// gesture
float mouthHeight;
float mouthWidth;

void setup() {
  size(640, 480);
  frameRate(30);

  oscP5 = new OscP5(this, 8338);
  oscP5.plug(this, "found", "/found");
  oscP5.plug(this, "poseScale", "/pose/scale");
  oscP5.plug(this, "posePosition", "/pose/position");
  oscP5.plug(this, "mouthWidthReceived", "/gesture/mouth/width");
  oscP5.plug(this, "mouthHeightReceived", "/gesture/mouth/height");
}

void draw() {  
  background(255);
  stroke(0);

  if(found > 0) {
    translate(posePosition.x, posePosition.y);
    scale(poseScale);
    noFill();
    ellipse(0, 20, mouthWidth* 3, mouthHeight * 3);
  }
}

// OSC CALLBACK FUNCTIONS

public void found(int i) {
  println("found: " + i);
  found = i;
}

public void poseScale(float s) {
  println("scale: " + s);
  poseScale = s;
}

public void posePosition(float x, float y) {
  println("pose position\tX: " + x + " Y: " + y );
  posePosition.set(x, y, 0);
}

public void mouthWidthReceived(float w) {
  println("mouth Width: " + w);
  mouthWidth = w;
}

public void mouthHeightReceived(float h) {
  println("mouth height: " + h);
  mouthHeight = h;
}


// all other OSC messages end up here
void oscEvent(OscMessage m) {
  if(m.isPlugged() == false) {
    println("UNPLUGGED: " + m);
  }
}

On OSX you can simply download the compiled FaceOSC app. On other operating systems you may need to setup OpenFrameworks, download ofxFaceTracker and compile FaceOSC yourself.