Clip Clip - 1 year ago 184
C++ Question

Extracting text OpenCV

I am trying to find the bounding boxes of text in an image and am currently using this approach:

// calculate the local variances of the grayscale image
Mat t_mean, t_mean_2;
Mat grayF;
outImg_gray.convertTo(grayF, CV_32F);
int winSize = 35;
blur(grayF, t_mean, cv::Size(winSize,winSize));
blur(grayF.mul(grayF), t_mean_2, cv::Size(winSize,winSize));
Mat varMat = t_mean_2 - t_mean.mul(t_mean);
varMat.convertTo(varMat, CV_8U);

// threshold the high variance regions
Mat varMatRegions = varMat > 100;

When given an image like this:

enter image description here

Then when I show
I get this image:

enter image description here

As you can see it somewhat combines the left block of text with the header of the card, for most cards this method works great but on busier cards it can cause problems.

The reason it is bad for those contours to connect is that it makes the bounding box of the contour nearly take up the entire card.

Can anyone suggest a different way I can find the text to ensure proper detection of text?

200 points to whoever can find the text in the card above the these two.

enter image description here
enter image description here

Answer Source

You can detect text by finding close edge elements (inspired from a LPD):

#include "opencv2/opencv.hpp"

std::vector<cv::Rect> detectLetters(cv::Mat img)
    std::vector<cv::Rect> boundRect;
    cv::Mat img_gray, img_sobel, img_threshold, element;
    cvtColor(img, img_gray, CV_BGR2GRAY);
    cv::Sobel(img_gray, img_sobel, CV_8U, 1, 0, 3, 1, 0, cv::BORDER_DEFAULT);
    cv::threshold(img_sobel, img_threshold, 0, 255, CV_THRESH_OTSU+CV_THRESH_BINARY);
    element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) );
    cv::morphologyEx(img_threshold, img_threshold, CV_MOP_CLOSE, element); //Does the trick
    std::vector< std::vector< cv::Point> > contours;
    cv::findContours(img_threshold, contours, 0, 1); 
    std::vector<std::vector<cv::Point> > contours_poly( contours.size() );
    for( int i = 0; i < contours.size(); i++ )
        if (contours[i].size()>100)
            cv::approxPolyDP( cv::Mat(contours[i]), contours_poly[i], 3, true );
            cv::Rect appRect( boundingRect( cv::Mat(contours_poly[i]) ));
            if (appRect.width>appRect.height) 
    return boundRect;


int main(int argc,char** argv)
    cv::Mat img1=cv::imread("side_1.jpg");
    cv::Mat img2=cv::imread("side_2.jpg");
    std::vector<cv::Rect> letterBBoxes1=detectLetters(img1);
    std::vector<cv::Rect> letterBBoxes2=detectLetters(img2);
    for(int i=0; i< letterBBoxes1.size(); i++)
    cv::imwrite( "imgOut1.jpg", img1);  
    for(int i=0; i< letterBBoxes2.size(); i++)
    cv::imwrite( "imgOut2.jpg", img2);  
    return 0;


a. element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) ); imgOut1 imgOut2

b. element = getStructuringElement(cv::MORPH_RECT, cv::Size(30, 30) ); imgOut1 imgOut2

Results are similar for the other image mentioned.

Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download