We have passed an image with single line having the text "Hello World" and the Tesseract OCR perfectly show the result 'Hello World'.
But when we passed an image with multiple lines text
Hello world
How are you
it doesn't show anything.
Here is our codes:
#include "stdafx.h"
#include <iostream>
#include <baseapi.h>
#include <allheaders.h>
#include <fstream>
using namespace std;
int _tmain(int argc, _TCHAR* argv[])
{
tesseract::TessBaseAPI api;
api.Init("", "eng", tesseract::OEM_DEFAULT);
api.SetPageSegMode(static_cast<tesseract::PageSegMode>(7));
api.SetOutputName("out");
cout<<"File name:";
char image[256];
cin>>image;
PIX *pixs = pixRead(image);
STRING text_out;
api.ProcessPages(image, NULL, 0, &text_out);
cout<<text_out.string();
ofstream files;
files.open("out.txt");
files << text_out.string()<<endl;
files.close();
cin>> image;
return 0;
}
Page Segmentation Mode 7 treats the image as a single text line. Try 3, which is Fully automatic page segmentation, but no OSD (default).