From 88e4c62b3970fe30de177cf47b357121eb76eef4 Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Sat, 19 Aug 2017 18:42:06 +0530 Subject: [PATCH 01/12] Add files via upload --- unittest/Makefile.am | 62 +++++++++++++++++++++++++++++++++++++ unittest/apiexample_test.cc | 52 +++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 unittest/Makefile.am create mode 100644 unittest/apiexample_test.cc diff --git a/unittest/Makefile.am b/unittest/Makefile.am new file mode 100644 index 0000000000..b486890e27 --- /dev/null +++ b/unittest/Makefile.am @@ -0,0 +1,62 @@ +AUTOMAKE_OPTIONS = subdir-objects + +AM_CPPFLAGS += \ + -DUSE_STD_NAMESPACE -DPANGO_ENABLE_ENGINE \ + -I$(top_srcdir)/ccmain -I$(top_srcdir)/api \ + -I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct \ + -I$(top_srcdir)/lstm -I$(top_srcdir)/arch \ + -I$(top_srcdir)/viewer \ + -I$(top_srcdir)/textord -I$(top_srcdir)/dict \ + -I$(top_srcdir)/classify -I$(top_srcdir)/display \ + -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil + +# Build googletest: +check_LTLIBRARIES = libgtest.la libgtest_main.la +libgtest_la_SOURCES = ../googletest/googletest/src/gtest-all.cc +libgtest_la_CPPFLAGS = -I$(top_srcdir)/googletest/googletest/include -I$(top_srcdir)/googletest/googletest -pthread +libgtest_main_la_SOURCES = ../googletest/googletest/src/gtest_main.cc +## libgtest_main_la_LIBADD = libgtest.la + +# Build unittests +GTEST_LIBS = libgtest.la libgtest_main.la +AM_CPPFLAGS += -isystem $(top_srcdir)/googletest/googletest/include + +check_PROGRAMS = \ + apiexample_test \ + tesseracttests \ + matrix_test + +TESTS = $(check_PROGRAMS) + +#List of source files needed to build the executable: + +tesseracttests_SOURCES = ../tests/tesseracttests.cpp +tesseracttests_LDADD = $(GTEST_LIBS) + +matrix_test_SOURCES = matrix_test.cc +matrix_test_LDADD = $(GTEST_LIBS) + +apiexample_test_SOURCES = apiexample_test.cc +#apiexample_test_LDFLAGS = -static +apiexample_test_LDFLAGS = $(OPENCL_LDFLAGS) + +if USING_MULTIPLELIBS +apiexample_test_LDADD = \ + $(top_srcdir)/ccutil/libtesseract_ccutil.la \ + $(top_srcdir)/ccstruct/libtesseract_ccstruct.la +else +apiexample_test_LDADD = \ + $(top_srcdir)/api/libtesseract.la +endif + +apiexample_test_LDADD += $(LEPTONICA_LIBS) +apiexample_test_LDADD += $(GTEST_LIBS) + +# for windows +if T_WIN +apiexample_test_LDADD += -lws2_32 +matrix_test_LDADD += -lws2_32 +tesseracttests_LDADD += -lws2_32 + +AM_CPPFLAGS += -I$(top_srcdir)/vs2010/port +endif diff --git a/unittest/apiexample_test.cc b/unittest/apiexample_test.cc new file mode 100644 index 0000000000..a9724b861e --- /dev/null +++ b/unittest/apiexample_test.cc @@ -0,0 +1,52 @@ +/////////////////////////////////////////////////////////////////////// +// File: apiexample.cpp +// Description: Api Example for Tesseract. +// Author: ShreeDevi Kumar +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/////////////////////////////////////////////////////////////////////// +#include "gtest/gtest.h" +#include "tesseract/baseapi.h" +#include "leptonica/allheaders.h" +#include +#include +#include +#include + +TEST(TesseractTest, ApiExample) +{ + const char* imagefile = "../testing/phototest.tif"; + const char* groundtruth = "testfiles/phototest.txt"; + char *outText; + std::locale loc("en_US.UTF-8"); + std::ifstream file(groundtruth); + file.imbue(loc); + std::string gtText((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); + if (api->Init(NULL, "eng")) { + fprintf(stderr, "Could not initialize tesseract.\n"); + exit(1); + } + Pix *image = pixRead(imagefile); + api->SetImage(image); + api->SetPageSegMode(tesseract::PSM_AUTO_OSD); + outText = api->GetUTF8Text(); + ASSERT_EQ(gtText,outText) << "Phototest.tif with default values OCR does not match ground truth"; + api->End(); + delete [] outText; + pixDestroy(&image); +} + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file From 436ad77e44cb52333a819d0c5498bd6e608a8bbc Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Sat, 19 Aug 2017 18:43:10 +0530 Subject: [PATCH 02/12] Create readme.md --- unittest/testfiles/readme.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 unittest/testfiles/readme.md diff --git a/unittest/testfiles/readme.md b/unittest/testfiles/readme.md new file mode 100644 index 0000000000..db49b89d81 --- /dev/null +++ b/unittest/testfiles/readme.md @@ -0,0 +1 @@ +Test files used for unit tests From 7b409a1bfa3a3badd4d3dadbc29e26ae2c7aec54 Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Sat, 19 Aug 2017 18:43:57 +0530 Subject: [PATCH 03/12] unittest testfile --- unittest/testfiles/phototest.txt | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 unittest/testfiles/phototest.txt diff --git a/unittest/testfiles/phototest.txt b/unittest/testfiles/phototest.txt new file mode 100644 index 0000000000..02d3a77cbb --- /dev/null +++ b/unittest/testfiles/phototest.txt @@ -0,0 +1,9 @@ +This is a lot of 12 point text to test the +ocr code and see if it works on all types +of file format. + +The quick brown dog jumped over the +lazy fox. The quick brown dog jumped +over the lazy fox. The quick brown dog +jumped over the lazy fox. The quick +brown dog jumped over the lazy fox. From f3dc156f2212c2cf4f71cf3f750e404e658b89fe Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Sat, 19 Aug 2017 18:46:23 +0530 Subject: [PATCH 04/12] updated version of apiexample_test --- unittest/apiexample_test.cc | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/unittest/apiexample_test.cc b/unittest/apiexample_test.cc index a9724b861e..00b0301707 100644 --- a/unittest/apiexample_test.cc +++ b/unittest/apiexample_test.cc @@ -1,5 +1,5 @@ /////////////////////////////////////////////////////////////////////// -// File: apiexample.cpp +// File: apiexample_test.cc // Description: Api Example for Tesseract. // Author: ShreeDevi Kumar // @@ -23,30 +23,36 @@ TEST(TesseractTest, ApiExample) { - const char* imagefile = "../testing/phototest.tif"; - const char* groundtruth = "testfiles/phototest.txt"; char *outText; - std::locale loc("en_US.UTF-8"); - std::ifstream file(groundtruth); - file.imbue(loc); + std::locale loc("en_US.UTF-8"); // You can also use "" for the default system locale + std::ifstream file("testfiles/phototest.txt"); + file.imbue(loc); // Use it for file input std::string gtText((std::istreambuf_iterator(file)), std::istreambuf_iterator()); + tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI(); + // Initialize tesseract-ocr with English, without specifying tessdata path if (api->Init(NULL, "eng")) { fprintf(stderr, "Could not initialize tesseract.\n"); exit(1); } - Pix *image = pixRead(imagefile); + + // Open input image with leptonica library + Pix *image = pixRead("../testing/phototest.tif"); api->SetImage(image); - api->SetPageSegMode(tesseract::PSM_AUTO_OSD); + // Get OCR result outText = api->GetUTF8Text(); - ASSERT_EQ(gtText,outText) << "Phototest.tif with default values OCR does not match ground truth"; + + ASSERT_EQ(gtText,outText) << "Phototest.tif with default values OCR does not match ground truth"; + + // Destroy used object and release memory api->End(); delete [] outText; pixDestroy(&image); + } int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} From 267734bc34fda70aab2d0ad7fb322e817006dc74 Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Sat, 19 Aug 2017 19:01:46 +0530 Subject: [PATCH 05/12] Changes needed for adding make check for unittest --- Makefile.am | 2 +- configure.ac | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile.am b/Makefile.am index 258cbdd047..3a1342a9b9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -20,7 +20,7 @@ endif .PHONY: install-langs ScrollView.jar install-jars training SUBDIRS = arch ccutil viewer cutil opencl ccstruct dict classify wordrec textord lstm -SUBDIRS += ccmain api . tessdata doc +SUBDIRS += ccmain api . tessdata doc unittest EXTRA_DIST = README.md\ aclocal.m4 config configure.ac autogen.sh contrib \ diff --git a/configure.ac b/configure.ac index ab4e30ae5d..ff9fca492a 100644 --- a/configure.ac +++ b/configure.ac @@ -66,7 +66,7 @@ AC_SUBST([GENERIC_VERSION]) # ---------------------------------------- # Do not require README file (we use README.md) -AM_INIT_AUTOMAKE([foreign]) +AM_INIT_AUTOMAKE([foreign subdir-objects]) AC_CONFIG_HEADERS([config_auto.h:config/config.h.in]) AM_MAINTAINER_MODE @@ -501,6 +501,7 @@ AC_CONFIG_FILES([tessdata/Makefile]) AC_CONFIG_FILES([tessdata/configs/Makefile]) AC_CONFIG_FILES([tessdata/tessconfigs/Makefile]) AC_CONFIG_FILES([testing/Makefile]) +AC_CONFIG_FILES([unittest/Makefile]) AC_CONFIG_FILES([java/Makefile]) AC_CONFIG_FILES([java/com/Makefile]) AC_CONFIG_FILES([java/com/google/Makefile]) From 6773e8b909d7409f7434db67da6dff56090a7eda Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Sat, 19 Aug 2017 19:17:08 +0530 Subject: [PATCH 06/12] add blank lines to match OCRed text --- unittest/testfiles/phototest.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unittest/testfiles/phototest.txt b/unittest/testfiles/phototest.txt index 02d3a77cbb..a47dc53210 100644 --- a/unittest/testfiles/phototest.txt +++ b/unittest/testfiles/phototest.txt @@ -7,3 +7,4 @@ lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. + From 3b05b3e28b5de07fff1784733f7218e94adb3f85 Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Sat, 2 Sep 2017 20:00:45 +0530 Subject: [PATCH 07/12] revert blank line at end as not needed after https://github.com/tesseract-ocr/tesseract/commit/8bb5a89d5ac0e0c5e94fc566dff70ee1fffc95d1 --- unittest/testfiles/phototest.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/unittest/testfiles/phototest.txt b/unittest/testfiles/phototest.txt index a47dc53210..02d3a77cbb 100644 --- a/unittest/testfiles/phototest.txt +++ b/unittest/testfiles/phototest.txt @@ -7,4 +7,3 @@ lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. The quick brown dog jumped over the lazy fox. - From 10cf508442497667b26b979f31d6f7f873b218f6 Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Sun, 3 Sep 2017 08:09:18 +0530 Subject: [PATCH 08/12] Changed syntax for AM_CPPFLAGS as per suggestion in https://github.com/tesseract-ocr/tesseract/pull/1088#discussion_r136700733 Not ALL of these are needed currently. --- unittest/Makefile.am | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/unittest/Makefile.am b/unittest/Makefile.am index b486890e27..beddae0701 100644 --- a/unittest/Makefile.am +++ b/unittest/Makefile.am @@ -1,14 +1,19 @@ AUTOMAKE_OPTIONS = subdir-objects -AM_CPPFLAGS += \ - -DUSE_STD_NAMESPACE -DPANGO_ENABLE_ENGINE \ - -I$(top_srcdir)/ccmain -I$(top_srcdir)/api \ - -I$(top_srcdir)/ccutil -I$(top_srcdir)/ccstruct \ - -I$(top_srcdir)/lstm -I$(top_srcdir)/arch \ - -I$(top_srcdir)/viewer \ - -I$(top_srcdir)/textord -I$(top_srcdir)/dict \ - -I$(top_srcdir)/classify -I$(top_srcdir)/display \ - -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil +AM_CPPFLAGS += -DUSE_STD_NAMESPACE -DPANGO_ENABLE_ENGINE +AM_CPPFLAGS += -I$(top_srcdir)/api +AM_CPPFLAGS += -I$(top_srcdir)/arch +AM_CPPFLAGS += -I$(top_srcdir)/ccmain +AM_CPPFLAGS += -I$(top_srcdir)/ccstruct +AM_CPPFLAGS += -I$(top_srcdir)/ccutil +AM_CPPFLAGS += -I$(top_srcdir)/classify +AM_CPPFLAGS += -I$(top_srcdir)/cutil +AM_CPPFLAGS += -I$(top_srcdir)/dict +AM_CPPFLAGS += -I$(top_srcdir)/display +AM_CPPFLAGS += -I$(top_srcdir)/lstm +AM_CPPFLAGS += -I$(top_srcdir)/textord +AM_CPPFLAGS += -I$(top_srcdir)/viewer +AM_CPPFLAGS += -I$(top_srcdir)/wordrec # Build googletest: check_LTLIBRARIES = libgtest.la libgtest_main.la From 0ed7317c38f70d1a65bf3d9dd7f2eebacfaae165 Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Mon, 4 Sep 2017 10:42:37 +0530 Subject: [PATCH 09/12] Rename unittest/testfiles/phototest.txt to testing/phototest.txt Add groundtruth in the testing directory --- {unittest/testfiles => testing}/phototest.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {unittest/testfiles => testing}/phototest.txt (100%) diff --git a/unittest/testfiles/phototest.txt b/testing/phototest.txt similarity index 100% rename from unittest/testfiles/phototest.txt rename to testing/phototest.txt From f7284569ff958d3dfdc3806d633a42a0598e3068 Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Mon, 4 Sep 2017 10:44:43 +0530 Subject: [PATCH 10/12] Use groundtruth from testing directory --- unittest/apiexample_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unittest/apiexample_test.cc b/unittest/apiexample_test.cc index 00b0301707..f6e85657e2 100644 --- a/unittest/apiexample_test.cc +++ b/unittest/apiexample_test.cc @@ -25,7 +25,7 @@ TEST(TesseractTest, ApiExample) { char *outText; std::locale loc("en_US.UTF-8"); // You can also use "" for the default system locale - std::ifstream file("testfiles/phototest.txt"); + std::ifstream file("../testing/phototest.txt"); file.imbue(loc); // Use it for file input std::string gtText((std::istreambuf_iterator(file)), std::istreambuf_iterator()); From e0464a0d3123ba2177050f052a008acbf987a6b4 Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Mon, 4 Sep 2017 10:45:44 +0530 Subject: [PATCH 11/12] Delete readme.md --- unittest/testfiles/readme.md | 1 - 1 file changed, 1 deletion(-) delete mode 100644 unittest/testfiles/readme.md diff --git a/unittest/testfiles/readme.md b/unittest/testfiles/readme.md deleted file mode 100644 index db49b89d81..0000000000 --- a/unittest/testfiles/readme.md +++ /dev/null @@ -1 +0,0 @@ -Test files used for unit tests From 69ef9401ca1bfb9e13332fb0e6ca6de395a7dc87 Mon Sep 17 00:00:00 2001 From: Shreeshrii Date: Mon, 4 Sep 2017 18:31:13 +0530 Subject: [PATCH 12/12] Groundtruth for testing/eurotext.tif --- testing/eurotext.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 testing/eurotext.txt diff --git a/testing/eurotext.txt b/testing/eurotext.txt new file mode 100644 index 0000000000..3602f95309 --- /dev/null +++ b/testing/eurotext.txt @@ -0,0 +1,12 @@ +The (quick) [brown] {fox} jumps! +Over the $43,456.78 #90 dog +& duck/goose, as 12.5% of E-mail +from aspammer@website.com is spam. +Der „schnelle” braune Fuchs springt +über den faulen Hund. Le renard brun +«rapide» saute par-dessus le chien +paresseux. La volpe marrone rapida +salta sopra il cane pigro. El zorro +marrón rápido salta sobre el perro +perezoso. A raposa marrom rápida +salta sobre o cão preguiçoso.