From dc8143f7f4b1ab239d1f4c6e54b870e1b4d3045d Mon Sep 17 00:00:00 2001 From: Burhan Two Date: Sun, 17 Jun 2018 16:29:36 +0500 Subject: [PATCH] OCR --- C_API.c | 37 +++++++++++++++++++++++++++++++++++++ OCR.cpp | 28 ++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 C_API.c create mode 100644 OCR.cpp diff --git a/C_API.c b/C_API.c new file mode 100644 index 0000000..217202a --- /dev/null +++ b/C_API.c @@ -0,0 +1,37 @@ +#include +#include +#include + +void die(const char *errstr) { + fputs(errstr, stderr); + exit(1); +} + +int main(int argc, char *argv[]) { + TessBaseAPI *handle; + PIX *img; + char *text; + + if((img = pixRead("russian-text.png")) == NULL) + die("Error reading image\n"); + + handle = TessBaseAPICreate(); + if(TessBaseAPIInit3(handle, NULL, "rus") != 0) + die("Error initialising tesseract\n"); + + TessBaseAPISetImage2(handle, img); + if(TessBaseAPIRecognize(handle, NULL) != 0) + die("Error in Tesseract recognition\n"); + + if((text = TessBaseAPIGetUTF8Text(handle)) == NULL) + die("Error getting text\n"); + + fputs(text, stdout); + + TessDeleteText(text); + TessBaseAPIEnd(handle); + TessBaseAPIDelete(handle); + pixDestroy(&img); + + return 0; +} diff --git a/OCR.cpp b/OCR.cpp new file mode 100644 index 0000000..ef21b54 --- /dev/null +++ b/OCR.cpp @@ -0,0 +1,28 @@ +#include +#include + +int main() +{ + char *outText; + + tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); + // Initialize tesseract-ocr with English, without specifying tessdata path + if (api->Init(NULL, "rus")) { + fprintf(stderr, "Could not initialize tesseract.\n"); + exit(1); + } + + // Open input image with leptonica library + Pix *image = pixRead("a.jpg"); + api->SetImage(image); + // Get OCR result + outText = api->GetUTF8Text(); + printf("OCR output:\n%s", outText); + + // Destroy used object and release memory + api->End(); + delete [] outText; + pixDestroy(&image); + + return 0; +}