Search code examples
c++opencvimage-processingedge-detectioncorner-detection

Implementing Harris Corner detector, by following standard material, it ends up detecting edges


Hi Thanks for your attention, I'm trying to implement Harris Corner Detection Algorithm, and have been following the standard Algorithm given on most souces one of them being Wiki Page I follow all the procedure as per the algorithm, and I use sobel filters for computing gradient gx and gy, but the result ends up being a edge detector ( which ended up surprisingly better than the canny edge detector I implemented :O ) I wonder what I am doing wrong.

Core.h

#ifndef CORE
#define CORE

#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
using namespace std;
using namespace cv;

#endif

Utils.h

#ifndef UTILS
#define UTILS

#include "Core.h"

static void displayImage(Mat &img, unsigned int time = 0,
                         string title = "frame") {
  imshow(title, img);
  waitKey(time);
}

static bool isValidPoint(Mat &img, int x, int y) {
  int rows = img.rows;
  int cols = img.cols;
  return (x >= 0 and x < rows and y >= 0 and y < cols);
}

static void performDiff(Mat img1, Mat img2) {
  int rows = img1.rows;
  int cols = img2.cols;
  Mat diff(rows, cols, CV_8U);
  bool zero = true;
  for (int i = 0; i < rows; i++) {
    for (int j = 0; j < cols; j++) {
      int d = (static_cast<int>(img1.at<u_char>(i, j)) -
               static_cast<int>(img2.at<u_char>(i, j)));
      diff.at<u_char>(i, j) = d;
      if (d != 0)
        zero = false;
    }
  }
  displayImage(diff, 0, "diff image");
}

static int getValIntOf(Mat &img, int x, int y) {
  return static_cast<int>(img.at<u_char>(x, y));
}

#endif

GaussianTransform.h

#ifndef GUASSIAN_TRANSFORM
#define GUASSIAN_TRANSFORM


static vector<vector<double>> getGuassianKernal(int size, double sigma) {
  vector<vector<double>> Kernal(size, vector<double>(size, 0.0));
  double sum = 0.0;
  int center = size / 2;
  for (int i = 0; i < size; i++) {
    for (int j = 0; j < size; j++) {
      int x = i - center;
      int y = j - center;
      Kernal[i][j] = exp(-(x * x + y * y) / (2 * sigma * sigma));
      sum += Kernal[i][j];
    }
  }
  if (sum != 0) {
    for (int i = 0; i < size; i++) {
      for (int j = 0; j < size; j++) {
        Kernal[i][j] /= sum;
      }
    }
  }
  return Kernal;
}


static Mat guassianFilterTransform(Mat &img, int FiltSize = 3,
                                   double Sigma = 1.5) {
  vector<vector<double>> filter = getGuassianKernal(FiltSize, Sigma);
  int trows = img.rows - FiltSize + 1;
  int tcols = img.cols - FiltSize + 1;
  Mat transformed(trows, tcols, CV_8U);
  for (int i = 1; i <= trows - 2; i++) {
    for (int j = 1; j <= tcols - 2; j++) {
      double tval = 0;
      for (int x = -1; x <= 1; x++) {
        for (int y = -1; y <= 1; y++) {
          tval = tval + (filter[x + 1][y + 1] *
                         static_cast<double>(img.at<u_char>(i + x, j + y)));
        }
      }
      transformed.at<u_char>(i, j) = static_cast<u_char>(tval);
    }
  }
  return (transformed);
}

#endif

Harris.cpp

#ifndef HARRIS_CORNER
#define HARRIS_CORNER

#include "../../BasicTransforms/GaussianTransform.h"
#include "../../Utils/Core/Core.h"
#include "../../Utils/Core/Utils.h"

double computeCornerResponseFunc(vector<vector<double>> &StructMat) {
  double det =
      StructMat[0][0] * StructMat[1][1] - StructMat[0][1] * StructMat[1][0];
  double trace = StructMat[0][0] + StructMat[1][1];
  return (det + (0.04) * (trace * trace));
}

Mat detectHarrisCorners(Mat &img) {
  int filtSize = 3;
  vector<vector<int>> filtery, filterx;
  filtery = {{-1, -2, -1}, {0, 0, 0}, {1, 2, 1}};
  filterx = {{-1, 0, 1}, {-2, 0, 2}, {-1, 0, 1}};
  int trows = img.rows - filtSize + 1;
  int tcols = img.cols - filtSize + 1;
  Mat gradMag(trows, tcols, CV_8U);
  Mat gradDir(trows, tcols, CV_64F);
  Mat result(trows, tcols, CV_8U);
  double Thrs = 200;
  vector<vector<double>> gaussianKernal = getGuassianKernal(3, 1.5);
  for (int i = 1; i <= trows - 2; i++) {
    for (int j = 1; j <= tcols - 2; j++) {
      double gradX = 0;
      double gradY = 0;
      for (int x = -1; x <= 1; x++) {
        for (int y = -1; y <= 1; y++) {
          gradX =
              gradX + gaussianKernal[x + 1][y + 1] *
                          (filterx[x + 1][y + 1] *
                           static_cast<double>(img.at<u_char>(i + x, j + y)));
          gradY =
              gradY + gaussianKernal[x + 1][y + 1] *
                          (filtery[x + 1][y + 1] *
                           static_cast<double>(img.at<u_char>(i + x, j + y)));
        }
      }
      vector<vector<double>> strtMtrx(2, vector<double>(2, 0.0));
      strtMtrx[0][0] = gradX * gradX;
      strtMtrx[1][1] = gradY * gradY;
      strtMtrx[0][1] = strtMtrx[1][0] = gradX * gradY;
      double CornerResponseFunc = computeCornerResponseFunc(strtMtrx);
      if (CornerResponseFunc >= Thrs) {
        result.at<u_char>(i, j) = 255;
      }
      double tval = sqrt(gradX * gradX + gradY * gradY);
      if (gradX == 0.00) {
        gradDir.at<double>(i, j) = (gradY >= 0 ? M_PI / 2.0 : -M_PI / 2.0);
      } else {
        gradDir.at<double>(i, j) = atan(gradY / gradX);
      }
      gradDir.at<double>(i, j) *= (180.0 / M_PI);
      gradDir.at<double>(i, j) = abs(gradDir.at<double>(i, j));
      gradMag.at<u_char>(i, j) = static_cast<u_char>(tval);
    }
  }
  for (int i = 1; i < trows - 1; i++) {
    for (int j = 1; j < tcols - 1; j++) {
      double angle = gradDir.at<double>(i, j);
      double neigh1, neigh2;
      if ((angle >= 0 and angle < 22.5) or (angle >= 157.5 && angle < 180)) {
        neigh1 = gradMag.at<u_char>(i, j + 1);
        neigh2 = gradMag.at<u_char>(i, j - 1);
      } else if ((angle >= 22.5 and angle < 67.5)) {
        neigh1 = gradMag.at<u_char>(i - 1, j + 1);
        neigh2 = gradMag.at<u_char>(i + 1, j - 1);
      } else if ((angle >= 67.5 and angle < 112.5)) {
        neigh1 = gradMag.at<u_char>(i - 1, j);
        neigh2 = gradMag.at<u_char>(i + 1, j);
      } else if ((angle >= 112.5 and angle < 157.5)) {
        neigh1 = gradMag.at<u_char>(i - 1, j - 1);
        neigh2 = gradMag.at<u_char>(i + 1, j + 1);
      } else {
        neigh1 = neigh2 = 0;
      }
      if (gradMag.at<u_char>(i, j) >= max(neigh1, neigh2))
        result.at<u_char>(i, j) = result.at<u_char>(i, j);
      else
        result.at<u_char>(i, j) = 0;
    }
  }
  return result;
}

int main() {
  string imPath =
      "/home/panirpal/workspace/Projects/ComputerVision/data/chess2.jpg";
  Mat img = imread(imPath, IMREAD_GRAYSCALE);
  if (!img.empty()) {
    Mat gaussian = guassianFilterTransform(img);
    Mat out = detectHarrisCorners(gaussian);
    displayImage(out);
  } else
    cerr << "image not found! exiting...";
}
#endif

Result : Output

Expected Output : Expected Output

Tried to play around with the magic numbers like threshold value and k value of 0.04 in computeCornerResponseFunc To No use ends up detecting edges.

Edit : Made the suggested Changes, It is able to detect some corners now, But I feel Not quite there yet. Updated code. Although Non max impression isn't performed, but I feel it more of a post processing step on the base algorithm, here I think computing Mat GradXYSqSmooth = guassianFilterTransform(GradXY, 1.3); is Introducing some noise, I wonder if there is something I'm doing wrong here, Or should that noise be filtered out! By using something like median filter?

#include <algorithm>
#include <math.h>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <vector>
using namespace std;
using namespace cv;

void displayImage(Mat &img, unsigned int time = 0, string title = "frame") {
  imshow(title, img);
  waitKey(time);
}

static double getDoubleValOf(Mat &img, int x, int y) {
  return static_cast<double>(img.at<u_char>(x, y));
}

static double square(double x) { return x * x; }

static double computeGaussianFunc(double x, double y, double mu, double sigma) {
  double val =
      exp(-0.5 * ((square((x - mu) / sigma)) + square((y - mu) / sigma))) /
      (2 * M_PI * sigma * sigma);
  return val;
}

static vector<vector<double>> getGuassianKernal(double sigma) {
  int size = 2 * ceil(3 * sigma) + 1;
  vector<vector<double>> Kernal(size, vector<double>(size, 0.0));
  double sum = 0.0;
  int center = size / 2;
  for (int i = 0; i < size; i++) {
    for (int j = 0; j < size; j++) {
      Kernal[i][j] = computeGaussianFunc(i, j, center, sigma);
      sum += Kernal[i][j];
    }
  }
  if (sum != 0) {
    for (int i = 0; i < size; i++) {
      for (int j = 0; j < size; j++) {
        Kernal[i][j] /= sum;
      }
    }
  }
  return Kernal;
}

static Mat guassianFilterTransform(Mat &img, double Sigma) {
  vector<vector<double>> filter = getGuassianKernal(Sigma);
  int FiltSize = filter.size();
  int trows = img.rows - FiltSize + 1;
  int tcols = img.cols - FiltSize + 1;
  // Final output
  Mat transformed(trows, tcols, CV_8U);
  for (int i = 0; i < trows; i++) {
    for (int j = 0; j <tcols; j++) {
      double tval = 0;
      for (int x = 0; x <FiltSize; x++) {
        for (int y = 0; y <FiltSize; y++) {
          tval = tval + (filter[x][y] *
                         static_cast<double>(img.at<u_char>(i + x, j + y)));
          tval = min(tval,255.0);
          tval = max(tval,0.0);
        }
      }
      transformed.at<u_char>(i,j) = static_cast<u_char>(round(tval));
    }
  }
  return (transformed);
}

double computeCornerResponseFunc(vector<vector<double>> &StructMat) {
  double det =
      StructMat[0][0] * StructMat[1][1] - StructMat[0][1] * StructMat[1][0];
  double trace = StructMat[0][0] + StructMat[1][1];
  return (det - (0.04) * (trace * trace));
}


Mat computeStructureTensorMatrixAndCornerResponse(Mat &GradXSqSmooth,
                                                  Mat &GradYSqSmooth,
                                                  Mat &GradXYSqSmooth) {
  int rows = GradXSqSmooth.rows;
  int cols = GradXSqSmooth.cols;
  Mat ResponseMat(rows, cols, CV_8U);
  for (int i = 0; i < rows; i++) {
    for (int j = 0; j < cols; j++) {
      vector<vector<double>> STM = {{getDoubleValOf(GradXSqSmooth, i, j),
                                     getDoubleValOf(GradXYSqSmooth, i, j)},
                                    {getDoubleValOf(GradXYSqSmooth, i, j),
                                     getDoubleValOf(GradYSqSmooth, i, j)}};
      double CornerRes = computeCornerResponseFunc(STM);
      if (CornerRes >= 200)
        ResponseMat.at<u_char>(i, j) = 255;
      else
        ResponseMat.at<u_char>(i, j) = 0;
    }
  }
  return ResponseMat;
}

// Try to Optimize for GradX this is not required we can directly compute
// GradXSq.
Mat detectHarrisCornersUpd(Mat &img) {
  int filtSize = 3;
  vector<vector<int>> filtery, filterx;
  filtery = {{-1, -2, -1}, {0, 0, 0}, {1, 2, 1}};
  filterx = {{-1, 0, 1}, {-2, 0, 2}, {-1, 0, 1}};
  int trows = img.rows - filtSize + 1;
  int tcols = img.cols - filtSize + 1;
  Mat GradX(trows, tcols, CV_64F);
  Mat GradY(trows, tcols, CV_64F);
  Mat gradDir(trows, tcols, CV_64F);
  Mat result(trows, tcols, CV_8U);
  double Thrs = 200;
  /*
    1) Derivative Calculation:

    Calculate the horizontal (Ix) and vertical (Iy) derivatives of the grayscale
    image using the Sobel operator: Ix = Sobel(I_gray, dx) Iy = Sobel(I_gray,
    dy) dx and dy are the Sobel operators in the x and y directions,
    respectively.
  */
  for (int i = 1; i <= trows - 2; i++) {
    for (int j = 1; j <= tcols - 2; j++) {
      double gradX = 0;
      double gradY = 0;
      // Compute Sobel Gradients.
      for (int x = -1; x <= 1; x++) {
        for (int y = -1; y <= 1; y++) {
          gradX = gradX + (filterx[x + 1][y + 1] *
                           static_cast<double>(img.at<u_char>(i + x, j + y)));
          gradY = gradY + (filtery[x + 1][y + 1] *
                           static_cast<double>(img.at<u_char>(i + x, j + y)));
        }
      }
      GradX.at<double>(i, j) = gradX;
      GradY.at<double>(i, j) = gradY;
      // GradDir required for non maximum supression
      double tval = sqrt(gradX * gradX + gradY * gradY);
      if (gradX == 0.00) {
        gradDir.at<double>(i, j) = (gradY >= 0 ? M_PI / 2.0 : -M_PI / 2.0);
      } else {
        gradDir.at<double>(i, j) = atan(gradY / gradX);
      }
      gradDir.at<double>(i, j) *= (180.0 / M_PI);
      gradDir.at<double>(i, j) = abs(gradDir.at<double>(i, j));
    }
  }
  /*
    3. Derivative Products:
    Element wise product
    Ix2 = Ix * Ix
    Iy2 = Iy * Iy
    Ixy = Ix * Iy
  */
  Mat GradXSq(trows, tcols, CV_64F);
  Mat GradYSq(trows, tcols, CV_64F);
  Mat GradXY(trows, tcols, CV_64F);
  for (int i = 0; i < trows; i++) {
    for (int j = 0; j < tcols; j++) {
      GradXSq.at<double>(i, j) =
          GradX.at<double>(i, j) * GradX.at<double>(i, j);
      GradYSq.at<double>(i, j) =
          GradY.at<double>(i, j) * GradY.at<double>(i, j);
      GradXY.at<double>(i, j) = GradX.at<double>(i, j) * GradY.at<double>(i, j);

      GradXSq.at<u_char>(i, j) =
          static_cast<u_char>(min(GradXSq.at<double>(i, j), 255.0));
      GradYSq.at<u_char>(i, j) =
          static_cast<u_char>(min(GradYSq.at<double>(i, j), 255.0));
      GradXY.at<u_char>(i, j) =
          static_cast<u_char>(min(GradXY.at<double>(i, j), 255.0));
    }
  }
  /*
    Gaussian Smoothing:

    Apply Gaussian smoothing to each of the derivative products to reduce noise
    sensitivity: Ix2_smooth = GaussianBlur(Ix2, kernel_size) Iy2_smooth =
    GaussianBlur(Iy2, kernel_size) Ixy_smooth = GaussianBlur(Ixy, kernel_size)
  */
  Mat GradXSqSmooth = guassianFilterTransform(GradXSq, 1.3);
  displayImage(GradXSqSmooth);
  Mat GradYSqSmooth = guassianFilterTransform(GradYSq, 1.3);
  displayImage(GradYSqSmooth);
  Mat GradXYSqSmooth = guassianFilterTransform(GradXY, 1.3);
  displayImage(GradXYSqSmooth);
  Mat StructTensorMat = computeStructureTensorMatrixAndCornerResponse(
      GradXSqSmooth, GradYSqSmooth, GradXYSqSmooth);
  return StructTensorMat;
}


int main() {
  string imPath =
      "/home/panirpal/workspace/Projects/ComputerVision/data/images/chess2.jpg";
  Mat img = imread(imPath, IMREAD_GRAYSCALE);
  if (!img.empty()) {
    displayImage(img);
    Mat gaussian = guassianFilterTransform(img,1.3);
    Mat out = detectHarrisCornersUpd(gaussian);
    displayImage(out);
  } else
    cerr << "image not found! exiting...";
  return 0;
}


GradXSqSmooth : XGradSqSmooth Img

GradYSqSmooth : yGradSqSmooth

GradXYSmooth : GradXYSmooth

Output Updated : Output


Solution

  • There are a few issues with your code. The most important one is that you are not applying the smoothing to the squared gradient images.

              gradX =
                  gradX + gaussianKernal[x + 1][y + 1] *
                              (filterx[x + 1][y + 1] *
                               static_cast<double>(img.at<u_char>(i + x, j + y)));
    

    Here you are computing the convolution of the image with a kernel formed by element-wise multiplication of the Sobel kernel and the Gaussian kernel. This just means you're not using the Sobel kernel, but something else entirely that I can't immediately analyze.

    Element-wise multiplication and convolution are two different things, and they don't commute. The structure tensor M (as described in the link you provide) has, as the first element, G*(IxIx). Ix is the result of the Sobel filter. IxIx is the square of that (element-wise multiplication of the result, as you did). But the G* part is a convolution with the Gaussian. This smoothing has to be applied after the element-wise multiplication. This is not the same as (G*Ix)(G*Ix)!


    Here's another issue:

    static_cast<u_char>(tval)
    

    This casts a double tval to 8-bit unsigned integer. tval is guaranteed to be non-negative, but it could very well be larger than 255. If so, the cast will wrap, the value 256 will be written as 0. You have to clamp the value first:

    static_cast<u_char>(std::min(tval,255.0))
    

    This one in performDiff is worse:

          int d = (static_cast<int>(img1.at<u_char>(i, j)) -
                   static_cast<int>(img2.at<u_char>(i, j)));
          diff.at<u_char>(i, j) = d
    

    The result of the subtraction cannot be represented in an 8-bit unsigned integer. Where img2 is larger, diff will have some positive value, for example -1 will be represented as 255. You need to take the absolute difference, and do the same clamping as I showed above.


    The call getGuassianKernal(3, 1.5) does not return a Gaussian. Cutting off a Gaussian with sigma 1.5 to 3 pixels leaves something much more similar to a filter with uniform weights than a Gaussian. I explain this in more detail here. For a sigma of 1.5 we'd typically use a kernel of size 2*ceil(3*1.5)+1, which is 11. At the very minimum you need 2*ceil(2*1.5)+1 = 7 (less precise but faster).