Search code examples
c++opencvimage-processingtextbounding-box

Extracting text OpenCV


I am trying to find the bounding boxes of text in an image and am currently using this approach:

// calculate the local variances of the grayscale image
Mat t_mean, t_mean_2;
Mat grayF;
outImg_gray.convertTo(grayF, CV_32F);
int winSize = 35;
blur(grayF, t_mean, cv::Size(winSize,winSize));
blur(grayF.mul(grayF), t_mean_2, cv::Size(winSize,winSize));
Mat varMat = t_mean_2 - t_mean.mul(t_mean);
varMat.convertTo(varMat, CV_8U);

// threshold the high variance regions
Mat varMatRegions = varMat > 100;

When given an image like this:

enter image description here

Then when I show varMatRegions I get this image:

enter image description here

As you can see it somewhat combines the left block of text with the header of the card, for most cards this method works great but on busier cards it can cause problems.

The reason it is bad for those contours to connect is that it makes the bounding box of the contour nearly take up the entire card.

Can anyone suggest a different way I can find the text to ensure proper detection of text?

200 points to whoever can find the text in the card above the these two.

enter image description here enter image description here


Solution

  • You can detect text by finding close edge elements (inspired from a LPD):

    #include "opencv2/opencv.hpp"
    
    std::vector<cv::Rect> detectLetters(cv::Mat img)
    {
        std::vector<cv::Rect> boundRect;
        cv::Mat img_gray, img_sobel, img_threshold, element;
        cvtColor(img, img_gray, CV_BGR2GRAY);
        cv::Sobel(img_gray, img_sobel, CV_8U, 1, 0, 3, 1, 0, cv::BORDER_DEFAULT);
        cv::threshold(img_sobel, img_threshold, 0, 255, CV_THRESH_OTSU+CV_THRESH_BINARY);
        element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) );
        cv::morphologyEx(img_threshold, img_threshold, CV_MOP_CLOSE, element); //Does the trick
        std::vector< std::vector< cv::Point> > contours;
        cv::findContours(img_threshold, contours, 0, 1); 
        std::vector<std::vector<cv::Point> > contours_poly( contours.size() );
        for( int i = 0; i < contours.size(); i++ )
            if (contours[i].size()>100)
            { 
                cv::approxPolyDP( cv::Mat(contours[i]), contours_poly[i], 3, true );
                cv::Rect appRect( boundingRect( cv::Mat(contours_poly[i]) ));
                if (appRect.width>appRect.height) 
                    boundRect.push_back(appRect);
            }
        return boundRect;
    }
    

    Usage:

    int main(int argc,char** argv)
    {
        //Read
        cv::Mat img1=cv::imread("side_1.jpg");
        cv::Mat img2=cv::imread("side_2.jpg");
        //Detect
        std::vector<cv::Rect> letterBBoxes1=detectLetters(img1);
        std::vector<cv::Rect> letterBBoxes2=detectLetters(img2);
        //Display
        for(int i=0; i< letterBBoxes1.size(); i++)
            cv::rectangle(img1,letterBBoxes1[i],cv::Scalar(0,255,0),3,8,0);
        cv::imwrite( "imgOut1.jpg", img1);  
        for(int i=0; i< letterBBoxes2.size(); i++)
            cv::rectangle(img2,letterBBoxes2[i],cv::Scalar(0,255,0),3,8,0);
        cv::imwrite( "imgOut2.jpg", img2);  
        return 0;
    }
    

    Results:

    a. element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) ); imgOut1 imgOut2

    b. element = getStructuringElement(cv::MORPH_RECT, cv::Size(30, 30) ); imgOut1 imgOut2

    Results are similar for the other image mentioned.