From 44717c4a606668c6e91b3f52ebfa530a86766876 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 5 Nov 2016 20:14:47 +0100 Subject: [PATCH 001/132] opencl: Fix wrong implementation of function getNumDeviceWithEmptyScore gcc report: opencl_device_selection.h: In function 'ds_status getNumDeviceWithEmptyScore(ds_profile*, unsigned int*)': opencl_device_selection.h:589:13: warning: value computed is not used [-Wunused-value] *num++; ^ This is caused by a buggy implementation which increases the value of num instead of *num. Signed-off-by: Stefan Weil --- opencl/opencl_device_selection.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencl/opencl_device_selection.h b/opencl/opencl_device_selection.h index 74272b35c0..1c8eee8c5c 100644 --- a/opencl/opencl_device_selection.h +++ b/opencl/opencl_device_selection.h @@ -586,7 +586,7 @@ static ds_status getNumDeviceWithEmptyScore(ds_profile* profile, *num=0; for (i = 0; i < profile->numDevices; i++) { if (profile->devices[i].score == NULL) { - *num++; + (*num)++; } } return DS_SUCCESS; From 663ca268cfd445a662b541b335a6fed03d5e7814 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 5 Nov 2016 20:20:07 +0100 Subject: [PATCH 002/132] opencl: Add missing argument for L_WARNING gcc report: In file included from /usr/include/leptonica/alltypes.h:36:0, from /usr/include/leptonica/allheaders.h:34, from openclwrapper.h:2, from openclwrapper.cpp:11: openclwrapper.cpp: In static member function 'static PIX* OpenclDevice::pixReadMemTiffCl(const l_uint8*, size_t, l_int32)': /usr/include/leptonica/environ.h:442:68: warning: format '%d' expects a matching 'int' argument [-Wformat=] (void)fprintf(stderr, "Warning in %s: " a, __VA_ARGS__), \ ^ /usr/include/leptonica/environ.h:427:61: note: in definition of macro 'IF_SEV' ((l) >= MINIMUM_SEVERITY && (l) >= LeptMsgSeverity ? (t) : (f)) ^ opencl/openclwrapper.cpp:1162:3: note: in expansion of macro 'L_WARNING' L_WARNING("tiff page %d not found", procName); ^ Signed-off-by: Stefan Weil --- opencl/openclwrapper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index f35fcd439d..0bad61f409 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -1159,7 +1159,7 @@ OpenclDevice::pixReadMemTiffCl(const l_uint8 *data,size_t size,l_int32 n) } if (pagefound == FALSE) { - L_WARNING("tiff page %d not found", procName); + L_WARNING("tiff page %d not found", procName, i); TIFFCleanup(tif); return NULL; } From 5919916bc5ca9dcb18142569c2317a79556431b9 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 7 Nov 2016 09:41:05 +0100 Subject: [PATCH 003/132] opencl: Remove unused function getNumDeviceWithEmptyScore This fixes compiler warnings. Signed-off-by: Stefan Weil --- opencl/opencl_device_selection.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/opencl/opencl_device_selection.h b/opencl/opencl_device_selection.h index 1c8eee8c5c..85083647a3 100644 --- a/opencl/opencl_device_selection.h +++ b/opencl/opencl_device_selection.h @@ -578,19 +578,5 @@ static ds_status readProfileFromFile(ds_profile* profile, return status; } -static ds_status getNumDeviceWithEmptyScore(ds_profile* profile, - unsigned int* num) { - unsigned int i; - if (profile == NULL || num==NULL) - return DS_MEMORY_ERROR; - *num=0; - for (i = 0; i < profile->numDevices; i++) { - if (profile->devices[i].score == NULL) { - (*num)++; - } - } - return DS_SUCCESS; -} - #endif #endif From c4dbc0e0fd4028da3f8098c31e0205bf94a122b3 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 7 Nov 2016 12:31:20 +0100 Subject: [PATCH 004/132] Fix crash caused by undefined value of local variable Commit b1f03cb6975ee4bde6d8c9450ea55f111cc36ac1 added a call of function FreeFeatureSet to fix a memory leak, but introduced a new bug because the local variable FloatFeatures was not always assigned a value. Now FloatFeatures is always assigned a value, and we only need a single place where FreeFeatureSet is called. Signed-off-by: Stefan Weil --- classify/adaptmatch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index 6e995c2159..38a9cda859 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -818,14 +818,14 @@ int Classify::GetAdaptiveFeatures(TBLOB *Blob, classify_norm_method.set_value(baseline); Features = ExtractPicoFeatures(Blob); + *FloatFeatures = Features; + NumFeatures = Features->NumFeatures; if (NumFeatures > UNLIKELY_NUM_FEAT) { - FreeFeatureSet(Features); return 0; } ComputeIntFeatures(Features, IntFeatures); - *FloatFeatures = Features; return NumFeatures; } /* GetAdaptiveFeatures */ From c5fdba596e117ef61a17983adb8742e4b39e8dcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 11 Nov 2016 22:03:16 +0100 Subject: [PATCH 005/132] allow combination of enable/disable --- configure.ac | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/configure.ac b/configure.ac index a775e4fc28..dd1cbba005 100644 --- a/configure.ac +++ b/configure.ac @@ -75,6 +75,7 @@ AM_CONDITIONAL([T_WIN], false) AM_CONDITIONAL([MINGW], false) AM_CONDITIONAL([OSX], false) AM_CONDITIONAL([GRAPHICS_DISABLED], false) +AC_SUBST([AM_CPPFLAGS]) OPENCL_INC="/opt/AMDAPP/include" OPENCL_LIBS="-lOpenCL -ltiff" @@ -154,7 +155,7 @@ AC_ARG_ENABLE([cube], AC_MSG_RESULT([$disable_cube]) AM_CONDITIONAL([NO_CUBE_BUILD], [test "$disable_cube" = "yes"]) if test "$disable_cube" = "yes"; then - AC_SUBST([AM_CPPFLAGS], [-DNO_CUBE_BUILD]) + AM_CPPFLAGS="-DNO_CUBE_BUILD $AM_CPPFLAGS" fi # check whether to build embedded version @@ -166,7 +167,7 @@ AC_ARG_ENABLE([embedded], AC_MSG_RESULT([$enable_embedded]) AM_CONDITIONAL([EMBEDDED], [test "$enable_embedded" = "yes"]) if test "$enable_embedded" = "yes"; then - AC_SUBST([AM_CPPFLAGS], [-DEMBEDDED]) + AM_CPPFLAGS="-DEMBEDDED $AM_CPPFLAGS" fi # check whether to build OpenMP support @@ -174,7 +175,7 @@ AM_CONDITIONAL([OPENMP], false) AC_OPENMP AS_IF([test "x$OPENMP_CFLAGS" != "x"], [AM_CONDITIONAL([OPENMP], true) - AC_SUBST([AM_CPPFLAGS], ["$OPENMP_CXXFLAGS"]) + AM_CPPFLAGS="$OPENMP_CXXFLAGS $AM_CPPFLAGS" AC_DEFINE([OPENMP], [], [Defined when compiled with OpenMP support])] ) @@ -227,7 +228,7 @@ case "${host_os}" in if !($have_opencl_lib); then AC_MSG_ERROR([Required OpenCL library not found!]) fi - AC_SUBST([AM_CPPFLAGS], [-DUSE_OPENCL]) + AM_CPPFLAGS="-DUSE_OPENCL $AM_CPPFLAGS" OPENCL_CPPFLAGS="" OPENCL_LDFLAGS="-framework OpenCL" fi @@ -246,7 +247,7 @@ case "${host_os}" in if !($have_tiff); then AC_MSG_ERROR([Required TIFF headers not found! Try to install libtiff-dev?? package.]) fi - AC_SUBST([AM_CPPFLAGS], [-DUSE_OPENCL]) + AM_CPPFLAGS="-DUSE_OPENCL $AM_CPPFLAGS" OPENCL_CPPFLAGS="-I${OPENCL_INC}" OPENCL_LDFLAGS="${OPENCL_LIBS}" fi From 0518bb549fe40584e4874fa4ea84026fd6786174 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 12 Nov 2016 21:52:26 +0100 Subject: [PATCH 006/132] opencl: Fix type of parameter for clGetContextInfo CL_CONTEXT_NUM_DEVICES expects a cl_uint. Passing size_t results in a wrong value for numDevices on hosts where sizeof(cl_uint) != sizeof(size_t). This results in errors like these: Tesseract Open Source OCR Engine v3.05.00dev with Leptonica OpenCL error code is -44 at when clCreateKernel kernel_HistogramRectAllChannels . OpenCL error code is -44 at when clCreateKernel kernel_HistogramRectAllChannelsReduction . OpenCL error code is -48 at when clSetKernelArg imageBuffer . ... Signed-off-by: Stefan Weil --- opencl/openclwrapper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index 0bad61f409..4bd91cd286 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -595,7 +595,7 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption ) const char *source; size_t source_size[1]; int b_error, binary_status, binaryExisted, idx; - size_t numDevices; + cl_uint numDevices; cl_device_id *mpArryDevsID; FILE *fd, *fd1; const char* filename = "kernel.cl"; From c097dd3592d1a854091341080a2ce97bbc09ae89 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 12 Nov 2016 21:58:54 +0100 Subject: [PATCH 007/132] opencl: Fix type of parameter for clGetProgramInfo CL_PROGRAM_NUM_DEVICES expects a cl_uint. Passing size_t results in a wrong value for numDevices on hosts where sizeof(cl_uint) != sizeof(size_t). Signed-off-by: Stefan Weil --- opencl/openclwrapper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index 4bd91cd286..68020702e2 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -462,7 +462,8 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c { unsigned int i = 0; cl_int clStatus; - size_t *binarySizes, numDevices=0; + size_t *binarySizes; + cl_uint numDevices; cl_device_id *mpArryDevsID; char **binaries, *str = NULL; From f8cac770a39aa6d48b29737a3e500052fd67b874 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 25 Nov 2016 09:06:58 +0100 Subject: [PATCH 008/132] backport from 4.00: issue #242 - different results when same image is lossless-encoded at different bpp --- ccmain/thresholder.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/ccmain/thresholder.cpp b/ccmain/thresholder.cpp index df6abd01eb..a9a127eb3b 100644 --- a/ccmain/thresholder.cpp +++ b/ccmain/thresholder.cpp @@ -152,19 +152,27 @@ void ImageThresholder::SetImage(const Pix* pix) { int depth; pixGetDimensions(src, &image_width_, &image_height_, &depth); // Convert the image as necessary so it is one of binary, plain RGB, or - // 8 bit with no colormap. - if (depth > 1 && depth < 8) { + // 8 bit with no colormap. Guarantee that we always end up with our own copy, + // not just a clone of the input. + if (pixGetColormap(src)) { + Pix* tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC); + depth = pixGetDepth(tmp); + if (depth > 1 && depth < 8) { + pix_ = pixConvertTo8(tmp, false); + pixDestroy(&tmp); + } else { + pix_ = tmp; + } + } else if (depth > 1 && depth < 8) { pix_ = pixConvertTo8(src, false); - } else if (pixGetColormap(src)) { - pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC); } else { - pix_ = pixClone(src); + pix_ = pixCopy(NULL, src); } depth = pixGetDepth(pix_); pix_channels_ = depth / 8; pix_wpl_ = pixGetWpl(pix_); scale_ = 1; - estimated_res_ = yres_ = pixGetYRes(src); + estimated_res_ = yres_ = pixGetYRes(pix_); Init(); } From 67fa73c95a751cd2dcc2029207fa7cbfc5097105 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 13 Nov 2016 22:32:13 +0100 Subject: [PATCH 009/132] training: Fix compiler warnings (deprecated register keyword) training/commontraining.cpp:824:3: warning: 'register' storage class specifier is deprecated and incompatible with C++1z [-Wdeprecated-register] ... Signed-off-by: Stefan Weil --- training/commontraining.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/training/commontraining.cpp b/training/commontraining.cpp index 1c4cc832bc..df2521f513 100644 --- a/training/commontraining.cpp +++ b/training/commontraining.cpp @@ -821,9 +821,9 @@ CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset, void Normalize ( float *Values) { - register float Slope; - register float Intercept; - register float Normalizer; + float Slope; + float Intercept; + float Normalizer; Slope = tan (Values [2] * 2 * PI); Intercept = Values [1] - Slope * Values [0]; From 07ce98b7fc61ebcca16618c9b9841bfbc6dd2fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Sat, 13 Feb 2016 00:04:59 +0100 Subject: [PATCH 010/132] add option "make training-uninstall" --- .gitignore | 1 + Makefile.am | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 6fa0865df7..1a43bfe9d3 100644 --- a/.gitignore +++ b/.gitignore @@ -52,6 +52,7 @@ training/wordlist2dawg *.patch # ignore compilation files +build/* */.deps/* */.libs/* *.lo diff --git a/Makefile.am b/Makefile.am index a4aa1dd915..8e2dbcf42f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -4,9 +4,12 @@ ACLOCAL_AMFLAGS = -I m4 if ENABLE_TRAINING TRAINING_SUBDIR = training training: + $(MAKE) @cd "$(top_builddir)/training" && $(MAKE) training-install: @cd "$(top_builddir)/training" && $(MAKE) install +training-uninstall: + @cd "$(top_builddir)/training" && $(MAKE) uninstall clean-local: @cd "$(top_builddir)/training" && $(MAKE) clean else From e5a86c47dd7d946e7ca4083ca541f8901233dc70 Mon Sep 17 00:00:00 2001 From: Cristian Ciupitu Date: Tue, 8 Nov 2016 23:20:48 +0200 Subject: [PATCH 011/132] Fix a typo in tesseract(1) man page C++ needs to escaped as C\+\+ in the AsciiDoc source code. --- doc/tesseract.1.asc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tesseract.1.asc b/doc/tesseract.1.asc index d6f34d5060..56627a9392 100644 --- a/doc/tesseract.1.asc +++ b/doc/tesseract.1.asc @@ -268,7 +268,7 @@ The engine was developed at Hewlett Packard Laboratories Bristol and at Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more changes made in 1996 to port to Windows, and some C\+\+izing in 1998. A lot of the code was written in C, and then some more was written in C\+\+. -The C\++ code makes heavy use of a list system using macros. This predates +The C\+\+ code makes heavy use of a list system using macros. This predates stl, was portable before stl, and is more efficient than stl lists, but has the big negative that if you do get a segmentation violation, it is hard to debug. From 56dc7d14b9f96b9e7511ff780abae7378d3f6ca7 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 3 Nov 2016 08:21:25 +0100 Subject: [PATCH 012/132] Fix typo in documentation Signed-off-by: Stefan Weil --- INSTALL.GIT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/INSTALL.GIT.md b/INSTALL.GIT.md index 07acbb0b01..31277e15fb 100644 --- a/INSTALL.GIT.md +++ b/INSTALL.GIT.md @@ -24,7 +24,7 @@ So, the steps for making Tesseract are: You need to install at least English language and OSD data files to TESSDATA_PREFIX directory. You can retrieve single file with tools like [wget](https://www.gnu.org/software/wget/), [curl](https://curl.haxx.se/), [GithubDownloader](https://github.com/intezer/GithubDownloader) or browser. -All language data files can be retrieved from git repository (usefull only for packagers!): +All language data files can be retrieved from git repository (useful only for packagers!): $ git clone https://github.com/tesseract-ocr/tessdata.git tesseract-ocr.tessdata From b04a91093207adf26e71efaf729048404b707efa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 25 Nov 2016 09:31:22 +0100 Subject: [PATCH 013/132] opencl: Fix typo in name of local variable Signed-off-by: Stefan Weil Conflicts: opencl/openclwrapper.cpp --- opencl/openclwrapper.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index 68020702e2..6ceb9bcf1f 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -3319,20 +3319,20 @@ PERF_COUNT_SUB("readProfileFromFile") // cleanup // TODO: call destructor for profile object? - bool overrided = false; + bool overridden = false; char *overrideDeviceStr = getenv("TESSERACT_OPENCL_DEVICE"); if (overrideDeviceStr != NULL) { int overrideDeviceIdx = atoi(overrideDeviceStr); if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices ) { printf("[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, %i)\n", overrideDeviceStr, overrideDeviceIdx); bestDeviceIdx = overrideDeviceIdx - 1; - overrided = true; + overridden = true; } else { printf("[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are valid devices).\n", overrideDeviceStr, profile->numDevices); } } - if (overrided) { + if (overridden) { printf("[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx+1, profile->devices[bestDeviceIdx].oclDeviceName, profile->devices[bestDeviceIdx].type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); } selectedDevice = profile->devices[bestDeviceIdx]; From b0a2189c25762945ff87626ac1420547bd51ae4c Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 12 Nov 2016 19:21:57 +0100 Subject: [PATCH 014/132] ccutil/ambigs: Optimize tesseract::UnicharIdArrayUtils::compare The compare method is called very often, so even small improvements are important. The new code avoids one comparison in each loop iteration. This results in smaller code (60 bytes for x86_64, gcc). Signed-off-by: Stefan Weil --- ccutil/ambigs.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/ccutil/ambigs.h b/ccutil/ambigs.h index b278f9f39d..02686035e9 100644 --- a/ccutil/ambigs.h +++ b/ccutil/ambigs.h @@ -59,17 +59,19 @@ class UnicharIdArrayUtils { // less than length of array2, if any array1[i] is less than array2[i]. // Returns 0 if the arrays are equal, 1 otherwise. // The function assumes that the arrays are terminated by INVALID_UNICHAR_ID. - static inline int compare(const UNICHAR_ID array1[], - const UNICHAR_ID array2[]) { - const UNICHAR_ID *ptr1 = array1; - const UNICHAR_ID *ptr2 = array2; - while (*ptr1 != INVALID_UNICHAR_ID && *ptr2 != INVALID_UNICHAR_ID) { - if (*ptr1 != *ptr2) return *ptr1 < *ptr2 ? -1 : 1; - ++ptr1; - ++ptr2; + static inline int compare(const UNICHAR_ID *ptr1, + const UNICHAR_ID *ptr2) { + for (;;) { + const UNICHAR_ID val1 = *ptr1++; + const UNICHAR_ID val2 = *ptr2++; + if (val1 != val2) { + if (val1 == INVALID_UNICHAR_ID) return -1; + if (val2 == INVALID_UNICHAR_ID) return 1; + if (val1 < val2) return -1; + return 1; + } + if (val1 == INVALID_UNICHAR_ID) return 0; } - if (*ptr1 == INVALID_UNICHAR_ID && *ptr2 == INVALID_UNICHAR_ID) return 0; - return *ptr1 == INVALID_UNICHAR_ID ? -1 : 1; } // Look uid in the vector of uids. If found, the index of the matched From 4f3cc9c795a8443eee7fd732adad789a03992cde Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Tue, 22 Nov 2016 10:41:43 -0800 Subject: [PATCH 015/132] Fixed failed merge of memory leak --- classify/adaptmatch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index 38a9cda859..6e995c2159 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -818,14 +818,14 @@ int Classify::GetAdaptiveFeatures(TBLOB *Blob, classify_norm_method.set_value(baseline); Features = ExtractPicoFeatures(Blob); - *FloatFeatures = Features; - NumFeatures = Features->NumFeatures; if (NumFeatures > UNLIKELY_NUM_FEAT) { + FreeFeatureSet(Features); return 0; } ComputeIntFeatures(Features, IntFeatures); + *FloatFeatures = Features; return NumFeatures; } /* GetAdaptiveFeatures */ From 054205f8fd530949e1978755effa493ac1c554bc Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 22 Nov 2016 18:53:54 +0100 Subject: [PATCH 016/132] Fix compiler warning (-Wmaybe-uninitialized) gcc report: ccstruct/blamer.cpp:343:65: warning: 'truth_x' may be used uninitialized in this function [-Wmaybe-uninitialized] Signed-off-by: Stefan Weil --- ccstruct/blamer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccstruct/blamer.cpp b/ccstruct/blamer.cpp index 5d2837d084..4573e9b3f0 100644 --- a/ccstruct/blamer.cpp +++ b/ccstruct/blamer.cpp @@ -317,7 +317,7 @@ void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) { int num_blobs = word->chopped_word->blobs.size(); int box_index = 0; int blob_index = 0; - inT16 truth_x; + inT16 truth_x = -1; while (box_index < truth_word_.length() && blob_index < num_blobs) { truth_x = norm_truth_word_.BlobBox(box_index).right(); TBLOB * curr_blob = word->chopped_word->blobs[blob_index]; From f8e1c7e2a0e7543256c431e57c1f0872c8271998 Mon Sep 17 00:00:00 2001 From: Amit D Date: Thu, 24 Nov 2016 12:25:13 +0200 Subject: [PATCH 017/132] AUTHORS - Add community contributors ... and Jeff --- AUTHORS | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/AUTHORS b/AUTHORS index 4252027d51..0615b38a22 100644 --- a/AUTHORS +++ b/AUTHORS @@ -2,6 +2,7 @@ Ray Smith (lead developer) Ahmad Abdulkader Rika Antonova Nicholas Beato +Jeff Breidenbach Samuel Charron Phil Cheatle Simon Crouch @@ -26,3 +27,14 @@ Joern Wanke Ping Ping Xiu Andrew Ziem Oscar Zuniga + +Community Contributers: +Zdenko Podobný (Maintainer) +Jim Regan (Maintainer) +James R Barlow +Amit Dovev +Martin Ettl +Tom Morris +Egor Pugin +Sundar M. Vaidya +Stefan Weil From e8d5152ab7c76f17d92c9042c67e40bd73697b31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 25 Nov 2016 09:37:57 +0100 Subject: [PATCH 018/132] Increase version number in VS2010 and fix year --- vs2010/include/tesseract_versionnumbers.props | 6 +++--- vs2010/libtesseract/libtesseract.rc | 2 +- vs2010/port/vcsversion.h | 2 +- vs2010/tesseract/tesseract.rc | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/vs2010/include/tesseract_versionnumbers.props b/vs2010/include/tesseract_versionnumbers.props index e16989d0e7..77bb3e03ab 100644 --- a/vs2010/include/tesseract_versionnumbers.props +++ b/vs2010/include/tesseract_versionnumbers.props @@ -4,9 +4,9 @@ - 303 - 3,3,0,0 - 3.03 + 305 + 3,5,0,0 + 3.05 <_ProjectFileVersion>10.0.40219.1 diff --git a/vs2010/libtesseract/libtesseract.rc b/vs2010/libtesseract/libtesseract.rc index f72d17f5a2..a4c5fc4568 100644 --- a/vs2010/libtesseract/libtesseract.rc +++ b/vs2010/libtesseract/libtesseract.rc @@ -72,7 +72,7 @@ BEGIN VALUE "FileDescription", "Tesseract OCR library" VALUE "FileVersion", "3, 5, 0, 0" VALUE "InternalName", "libtesseract" - VALUE "LegalCopyright", "Copyright (C) 2015 Google, Inc. Licensed under the Apache License, Version 2.0" + VALUE "LegalCopyright", "Copyright (C) 2016 Google, Inc. Licensed under the Apache License, Version 2.0" VALUE "OriginalFilename", "libtesseract" VALUE "ProductName", "Tesseract OCR Library" VALUE "ProductVersion", "3, 5, 0, 0" diff --git a/vs2010/port/vcsversion.h b/vs2010/port/vcsversion.h index 6d5bed8074..d620733040 100644 --- a/vs2010/port/vcsversion.h +++ b/vs2010/port/vcsversion.h @@ -1,2 +1,2 @@ -#define GIT_REV "3.04.00" +#define GIT_REV "3.05.00dev" diff --git a/vs2010/tesseract/tesseract.rc b/vs2010/tesseract/tesseract.rc index f2fc5248dc..48ae7fc88b 100644 --- a/vs2010/tesseract/tesseract.rc +++ b/vs2010/tesseract/tesseract.rc @@ -72,7 +72,7 @@ BEGIN VALUE "FileDescription", "Tesseract command-line OCR engine" VALUE "FileVersion", "3,5,0,0" VALUE "InternalName", "tesseract" - VALUE "LegalCopyright", "Copyright (C) 2015 Google, Inc. Licensed under the Apache License, Version 2.0" + VALUE "LegalCopyright", "Copyright (C) 2016 Google, Inc. Licensed under the Apache License, Version 2.0" VALUE "OriginalFilename", "tesseract.exe" VALUE "ProductName", "Tesseract-OCR" VALUE "ProductVersion", "3.05.00dev" From 057b932f023ba6baf1cf0da3a0bc87946215c5d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 25 Nov 2016 13:13:28 +0100 Subject: [PATCH 019/132] backport from 4.00: add missing License information --- LICENSE | 202 ++++++++++++++++++ ccmain/cube_control.cpp | 10 + ccstruct/hpdsizes.h | 9 + ccutil/ccutil.cpp | 9 + neural_networks/runtime/input_file_buffer.cpp | 9 + neural_networks/runtime/input_file_buffer.h | 9 + neural_networks/runtime/neural_net.cpp | 9 + neural_networks/runtime/neural_net.h | 9 + neural_networks/runtime/neuron.cpp | 9 + neural_networks/runtime/neuron.h | 9 + neural_networks/runtime/sigmoid_table.cpp | 9 + opencl/oclkernels.h | 16 +- opencl/opencl_device_selection.h | 15 +- opencl/openclwrapper.cpp | 9 + opencl/openclwrapper.h | 9 + testing/reorgdata.sh | 9 + textord/devanagari_processing.h | 13 +- textord/gap_map.cpp | 9 + textord/gap_map.h | 9 + textord/tospace.cpp | 9 + training/commandlineflags.cpp | 9 + training/language-specific.sh | 10 + training/set_unicharset_properties.cpp | 10 + viewer/svpaint.cpp | 9 + 24 files changed, 421 insertions(+), 8 deletions(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ccmain/cube_control.cpp b/ccmain/cube_control.cpp index 1430debc5b..b982289db9 100644 --- a/ccmain/cube_control.cpp +++ b/ccmain/cube_control.cpp @@ -5,6 +5,16 @@ * Author: Raquel Romano * Created: September 2009 * + * (C) Copyright 2009, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. **********************************************************************/ // Include automatically generated configuration file if running autoconf. diff --git a/ccstruct/hpdsizes.h b/ccstruct/hpdsizes.h index 2670e21b07..f4d886a0b4 100644 --- a/ccstruct/hpdsizes.h +++ b/ccstruct/hpdsizes.h @@ -1,3 +1,12 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifndef HPDSIZES_H #define HPDSIZES_H diff --git a/ccutil/ccutil.cpp b/ccutil/ccutil.cpp index ecf2cb5e36..8f965bb8c1 100644 --- a/ccutil/ccutil.cpp +++ b/ccutil/ccutil.cpp @@ -1,5 +1,14 @@ // Copyright 2008 Google Inc. All Rights Reserved. // Author: scharron@google.com (Samuel Charron) +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include "ccutil.h" diff --git a/neural_networks/runtime/input_file_buffer.cpp b/neural_networks/runtime/input_file_buffer.cpp index c3ca67b604..0d88bec3a7 100644 --- a/neural_networks/runtime/input_file_buffer.cpp +++ b/neural_networks/runtime/input_file_buffer.cpp @@ -4,6 +4,15 @@ // // input_file_buffer.h: Declarations of a class for an object that // represents an input file buffer. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include #include "input_file_buffer.h" diff --git a/neural_networks/runtime/input_file_buffer.h b/neural_networks/runtime/input_file_buffer.h index 5aa7465c41..51110c4753 100644 --- a/neural_networks/runtime/input_file_buffer.h +++ b/neural_networks/runtime/input_file_buffer.h @@ -5,6 +5,15 @@ // input_file_buffer.h: Declarations of a class for an object that // represents an input file buffer. // +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifndef INPUT_FILE_BUFFER_H #define INPUT_FILE_BUFFER_H diff --git a/neural_networks/runtime/neural_net.cpp b/neural_networks/runtime/neural_net.cpp index fd2c65af45..d0d359bd53 100644 --- a/neural_networks/runtime/neural_net.cpp +++ b/neural_networks/runtime/neural_net.cpp @@ -4,6 +4,15 @@ // // neural_net.cpp: Declarations of a class for an object that // represents an arbitrary network of neurons +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. // #include #include diff --git a/neural_networks/runtime/neural_net.h b/neural_networks/runtime/neural_net.h index 91d0d68a24..081a042804 100644 --- a/neural_networks/runtime/neural_net.h +++ b/neural_networks/runtime/neural_net.h @@ -5,6 +5,15 @@ // neural_net.h: Declarations of a class for an object that // represents an arbitrary network of neurons // +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifndef NEURAL_NET_H #define NEURAL_NET_H diff --git a/neural_networks/runtime/neuron.cpp b/neural_networks/runtime/neuron.cpp index 36309082d9..f1ff7b2316 100644 --- a/neural_networks/runtime/neuron.cpp +++ b/neural_networks/runtime/neuron.cpp @@ -4,6 +4,15 @@ // // neuron.cpp: The implementation of a class for an object // that represents a single neuron in a neural network +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include "neuron.h" #include "input_file_buffer.h" diff --git a/neural_networks/runtime/neuron.h b/neural_networks/runtime/neuron.h index a13d4a2eb7..8021902f3f 100644 --- a/neural_networks/runtime/neuron.h +++ b/neural_networks/runtime/neuron.h @@ -5,6 +5,15 @@ // neuron.h: Declarations of a class for an object that // represents a single neuron in a neural network // +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifndef NEURON_H #define NEURON_H diff --git a/neural_networks/runtime/sigmoid_table.cpp b/neural_networks/runtime/sigmoid_table.cpp index f170a10844..0be73b6f8f 100644 --- a/neural_networks/runtime/sigmoid_table.cpp +++ b/neural_networks/runtime/sigmoid_table.cpp @@ -3,6 +3,15 @@ // Author: ahmadab@google.com (Ahmad Abdulkader) // // sigmoid_table.cpp: Sigmoid function lookup table +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include "neuron.h" diff --git a/opencl/oclkernels.h b/opencl/oclkernels.h index b3a8316b12..f3912e1978 100644 --- a/opencl/oclkernels.h +++ b/opencl/oclkernels.h @@ -1,4 +1,12 @@ - +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifndef _OCL_KERNEL_H_ #define _OCL_KERNEL_H_ #ifndef USE_EXTERNAL_KERNEL @@ -1092,7 +1100,8 @@ void kernel_ThresholdRectToPix( for ( int c = 0; c < NUM_CHANNELS; c++) { unsigned char pixChan = pixels.s[p*NUM_CHANNELS + c]; if (pHi_Values[c] >= 0 && (pixChan > pThresholds[c]) == (pHi_Values[c] == 0)) { - word |= (((uint)0x80000000) >> ((b*PIXELS_PER_BURST+p)&31)); + const uint kTopBit = 0x80000000; + word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31)); } } } @@ -1157,7 +1166,8 @@ void kernel_ThresholdRectToPix_OneChan( \n#endif\n unsigned char pixChan = pixels.s[idx]; if (pHi_Values[0] >= 0 && (pixChan > pThresholds[0]) == (pHi_Values[0] == 0)) { - word |= (0x80000000 >> ((b*PIXELS_PER_BURST+p)&31)); + const uint kTopBit = 0x80000000; + word |= (kTopBit >> ((b*PIXELS_PER_BURST+p)&31)); } } } diff --git a/opencl/opencl_device_selection.h b/opencl/opencl_device_selection.h index 85083647a3..238250cb70 100644 --- a/opencl/opencl_device_selection.h +++ b/opencl/opencl_device_selection.h @@ -1,3 +1,12 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifdef USE_OPENCL #ifndef DEVICE_SELECTION_H #define DEVICE_SELECTION_H @@ -63,8 +72,10 @@ static ds_status releaseDSProfile(ds_profile* profile, ds_score_release sr) { if (profile->devices!=NULL && sr!=NULL) { unsigned int i; for (i = 0; i < profile->numDevices; i++) { - if (profile->devices[i].oclDeviceName) free(profile->devices[i].oclDeviceName); - if (profile->devices[i].oclDriverVersion) free(profile->devices[i].oclDriverVersion); + if (profile->devices[i].oclDeviceName) + free(profile->devices[i].oclDeviceName); + if (profile->devices[i].oclDriverVersion) + free(profile->devices[i].oclDriverVersion); status = sr(profile->devices[i].score); if (status != DS_SUCCESS) break; diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index 6ceb9bcf1f..077b7b215f 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -1,3 +1,12 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifdef _WIN32 #include #include diff --git a/opencl/openclwrapper.h b/opencl/openclwrapper.h index ae52a80155..f7e9ad7891 100644 --- a/opencl/openclwrapper.h +++ b/opencl/openclwrapper.h @@ -1,3 +1,12 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include #include "allheaders.h" #include "pix.h" diff --git a/testing/reorgdata.sh b/testing/reorgdata.sh index 141de4a6f4..8cee64ff80 100755 --- a/testing/reorgdata.sh +++ b/testing/reorgdata.sh @@ -1,4 +1,13 @@ #!/bin/bash +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. if [ $# -ne 1 ] then diff --git a/textord/devanagari_processing.h b/textord/devanagari_processing.h index 990a5dfe39..0d070decd0 100644 --- a/textord/devanagari_processing.h +++ b/textord/devanagari_processing.h @@ -1,5 +1,14 @@ // Copyright 2008 Google Inc. All Rights Reserved. // Author: shobhitsaxena@google.com (Shobhit Saxena) +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ @@ -41,9 +50,7 @@ class PixelHistogram { length_ = 0; } - int* hist() const { - return hist_; - } + int* hist() const { return hist_; } int length() const { return length_; diff --git a/textord/gap_map.cpp b/textord/gap_map.cpp index 2f8440e601..421208784d 100644 --- a/textord/gap_map.cpp +++ b/textord/gap_map.cpp @@ -1,3 +1,12 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include "statistc.h" #include "gap_map.h" diff --git a/textord/gap_map.h b/textord/gap_map.h index 914e8dbdea..227db3646b 100644 --- a/textord/gap_map.h +++ b/textord/gap_map.h @@ -1,3 +1,12 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifndef GAP_MAP_H #define GAP_MAP_H diff --git a/textord/tospace.cpp b/textord/tospace.cpp index bec346ef52..025634e1af 100644 --- a/textord/tospace.cpp +++ b/textord/tospace.cpp @@ -1,3 +1,12 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. /********************************************************************** * tospace.cpp * diff --git a/training/commandlineflags.cpp b/training/commandlineflags.cpp index 06bfbe6589..d8cb371828 100644 --- a/training/commandlineflags.cpp +++ b/training/commandlineflags.cpp @@ -1,3 +1,12 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #include "commandlineflags.h" #ifdef USE_STD_NAMESPACE diff --git a/training/language-specific.sh b/training/language-specific.sh index a62f1e3cf3..5f624c6437 100755 --- a/training/language-specific.sh +++ b/training/language-specific.sh @@ -1,4 +1,14 @@ # +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# # Set some language specific variables. Works in conjunction with # tesstrain.sh # diff --git a/training/set_unicharset_properties.cpp b/training/set_unicharset_properties.cpp index 00844ecb56..691c6dcfa6 100644 --- a/training/set_unicharset_properties.cpp +++ b/training/set_unicharset_properties.cpp @@ -1,3 +1,13 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + // This program reads a unicharset file, puts the result in a UNICHARSET // object, fills it with properties about the unichars it contains and writes // the result back to a file. diff --git a/viewer/svpaint.cpp b/viewer/svpaint.cpp index 4d2f49d951..c267257bac 100644 --- a/viewer/svpaint.cpp +++ b/viewer/svpaint.cpp @@ -1,6 +1,15 @@ // Copyright 2007 Google Inc. All Rights Reserved. // // Author: Joern Wanke +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. // // Simple drawing program to illustrate ScrollView capabilities. // From e9785a00181f9946a87c5aabcea41d3f7bc9a396 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 25 Nov 2016 13:17:23 +0100 Subject: [PATCH 020/132] downgrade to leptonica 1.73 --- cppan.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cppan.yml b/cppan.yml index 8065da79e4..98738d91ed 100644 --- a/cppan.yml +++ b/cppan.yml @@ -121,4 +121,4 @@ dependencies: private: # tesseract uses leptonica only internally # and does not expose its interface to users - pvt.cppan.demo.leptonica: master + pvt.cppan.demo.leptonica: 1.73 From e5743fd647fc8844a9a77f494d2671aaff00a272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 25 Nov 2016 13:43:57 +0100 Subject: [PATCH 021/132] backport style changes from 4.00 for better identification of fixes and new code --- api/baseapi.cpp | 117 +- api/baseapi.h | 14 +- api/pdfrenderer.cpp | 13 +- api/renderer.cpp | 29 +- api/renderer.h | 20 +- api/tesseractmain.cpp | 162 ++- ccmain/control.cpp | 17 +- ccmain/docqual.cpp | 16 +- ccmain/fixspace.cpp | 5 +- ccmain/ltrresultiterator.cpp | 11 +- ccmain/ltrresultiterator.h | 3 +- ccmain/output.cpp | 28 +- ccmain/pagesegmain.cpp | 7 +- ccmain/paramsd.cpp | 10 +- ccmain/reject.cpp | 4 +- ccstruct/blobbox.cpp | 4 +- ccstruct/boxread.cpp | 3 +- ccstruct/boxword.h | 4 +- ccstruct/coutln.cpp | 66 +- ccstruct/matrix.h | 4 +- ccstruct/mod128.cpp | 4 +- ccstruct/mod128.h | 4 +- ccstruct/pageres.h | 6 +- ccstruct/pdblock.h | 150 +-- ccstruct/polyaprx.cpp | 2 +- ccstruct/polyblk.cpp | 2 +- ccstruct/quspline.cpp | 4 +- ccstruct/ratngs.h | 3 +- ccstruct/rect.cpp | 12 +- ccstruct/rect.h | 10 +- ccstruct/rejctmap.h | 61 +- ccstruct/statistc.cpp | 5 +- ccutil/clst.cpp | 29 +- ccutil/clst.h | 288 ++--- ccutil/elst.cpp | 26 +- ccutil/elst.h | 62 +- ccutil/elst2.cpp | 26 +- ccutil/elst2.h | 321 +++-- ccutil/errcode.h | 9 +- ccutil/lsterr.h | 2 +- ccutil/mainblk.cpp | 2 +- ccutil/ocrclass.h | 39 +- ccutil/params.cpp | 3 +- ccutil/strngs.cpp | 4 +- ccutil/tessdatamanager.h | 70 +- ccutil/unicharset.h | 3 +- classify/classify.cpp | 4 +- classify/cluster.cpp | 477 ++++--- classify/clusttool.cpp | 43 +- classify/clusttool.h | 22 +- classify/cutoffs.cpp | 10 +- classify/featdefs.cpp | 14 +- classify/fpoint.cpp | 11 +- classify/intmatcher.cpp | 26 +- classify/intmatcher.h | 10 +- classify/intproto.cpp | 18 +- classify/kdtree.cpp | 28 +- classify/mf.cpp | 12 +- classify/mfdefs.cpp | 16 +- classify/mfoutline.cpp | 5 +- classify/mfx.cpp | 4 +- classify/mfx.h | 10 +- classify/normfeat.cpp | 11 +- classify/normmatch.cpp | 18 +- classify/ocrfeatures.cpp | 33 +- classify/outfeat.cpp | 14 +- classify/picofeat.cpp | 12 +- cutil/bitvec.h | 42 +- cutil/danerror.cpp | 10 +- cutil/efio.cpp | 12 +- cutil/emalloc.cpp | 2 +- dict/context.cpp | 38 +- dict/stopper.cpp | 11 +- dict/stopper.h | 10 +- opencl/openclwrapper.cpp | 1940 +++++++++++++---------------- opencl/openclwrapper.h | 97 +- tessdata/configs/box.train.stderr | 8 +- textord/blkocc.h | 20 +- textord/drawedg.h | 7 +- textord/fpchop.cpp | 5 +- textord/makerow.cpp | 8 +- textord/oldbasel.cpp | 79 +- textord/pitsync1.h | 6 +- textord/tabvector.cpp | 12 +- textord/textlineprojection.cpp | 2 +- textord/textord.cpp | 122 +- textord/topitch.cpp | 6 +- textord/tovars.cpp | 10 +- textord/tovars.h | 10 +- training/classifier_tester.cpp | 6 +- training/cntraining.cpp | 21 +- training/commandlineflags.cpp | 24 +- training/commontraining.cpp | 119 +- training/stringrenderer.h | 17 +- training/text2image.cpp | 9 +- viewer/scrollview.cpp | 2 +- viewer/svutil.h | 12 + wordrec/lm_state.h | 14 +- wordrec/measure.h | 42 +- wordrec/pieces.cpp | 1 - 100 files changed, 2377 insertions(+), 2829 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index a0689978e5..55c7892c6e 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -809,9 +809,7 @@ int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks, * has not been subjected to a call of Init, SetImage, Recognize, Clear, End * DetectOS, or anything else that changes the internal PAGE_RES. */ -PageIterator* TessBaseAPI::AnalyseLayout() { - return AnalyseLayout(false); -} +PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); } PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { if (FindLines() == 0) { @@ -948,9 +946,7 @@ void TessBaseAPI::SetInputImage(Pix *pix) { input_image_ = pixCopy(NULL, pix); } -Pix* TessBaseAPI::GetInputImage() { - return input_image_; -} +Pix* TessBaseAPI::GetInputImage() { return input_image_; } const char * TessBaseAPI::GetInputName() { if (input_file_) @@ -1379,8 +1375,9 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it, hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0); } -static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int num2) { - const unsigned long BUFSIZE = 64; +static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, + int num2) { + const size_t BUFSIZE = 64; char id_buffer[BUFSIZE]; if (num2 >= 0) { snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2); @@ -1393,8 +1390,7 @@ static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int *hocr_str += "'"; } -static void AddBoxTohOCR(const ResultIterator *it, - PageIteratorLevel level, +static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level, STRING* hocr_str) { int left, top, right, bottom; it->BoundingBox(level, &left, &top, &right, &bottom); @@ -1410,7 +1406,7 @@ static void AddBoxTohOCR(const ResultIterator *it, // add custom height measures float row_height, descenders, ascenders; // row attributes it->RowAttributes(&row_height, &descenders, &ascenders); - // TODO: Do we want to limit these to a single decimal place? + // TODO(rays): Do we want to limit these to a single decimal place? hocr_str->add_str_double("; x_size ", row_height); hocr_str->add_str_double("; x_descenders ", descenders * -1); hocr_str->add_str_double("; x_ascenders ", ascenders); @@ -1418,9 +1414,8 @@ static void AddBoxTohOCR(const ResultIterator *it, *hocr_str += "\">"; } -static void AddBoxToTSV(const PageIterator *it, - PageIteratorLevel level, - STRING* hocr_str) { +static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, + STRING* hocr_str) { int left, top, right, bottom; it->BoundingBox(level, &left, &top, &right, &bottom); hocr_str->add_str_int("\t", left); @@ -1429,8 +1424,6 @@ static void AddBoxToTSV(const PageIterator *it, hocr_str->add_str_int("\t", bottom - top); } - - /** * Make a HTML-formatted string with hOCR markup from the internal * data structures. @@ -1440,7 +1433,7 @@ static void AddBoxToTSV(const PageIterator *it, * STL removed from original patch submission and refactored by rays. */ char* TessBaseAPI::GetHOCRText(int page_number) { - return GetHOCRText(NULL,page_number); + return GetHOCRText(NULL, page_number); } /** @@ -1452,13 +1445,12 @@ char* TessBaseAPI::GetHOCRText(int page_number) { * STL removed from original patch submission and refactored by rays. */ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { - if (tesseract_ == NULL || - (page_res_ == NULL && Recognize(monitor) < 0)) + if (tesseract_ == NULL || (page_res_ == NULL && Recognize(monitor) < 0)) return NULL; int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; int page_id = page_number + 1; // hOCR uses 1-based page numbers. - bool para_is_ltr = true; // Default direction is LTR + bool para_is_ltr = true; // Default direction is LTR const char* paragraph_lang = NULL; bool font_info = false; GetBoolVariable("hocr_font_info", &font_info); @@ -1470,13 +1462,13 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { #ifdef _WIN32 // convert input name from ANSI encoding to utf-8 - int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, - NULL, 0); + int str16_len = + MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, NULL, 0); wchar_t *uni16_str = new WCHAR[str16_len]; str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, uni16_str, str16_len); - int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, - 0, NULL, NULL); + int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, 0, + NULL, NULL); char *utf8_str = new char[utf8_len]; WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, NULL, NULL); @@ -1509,7 +1501,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { // Open any new block/paragraph/textline. if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - para_is_ltr = true; // reset to default direction + para_is_ltr = true; // reset to default direction hocr_str += "
WordRecognitionLanguage(); if (paragraph_lang) { - hocr_str += " lang='"; - hocr_str += paragraph_lang; - hocr_str += "'"; + hocr_str += " lang='"; + hocr_str += paragraph_lang; + hocr_str += "'"; } AddBoxTohOCR(res_it, RIL_PARA, &hocr_str); } @@ -1600,7 +1592,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { if (last_word_in_para) { hocr_str += "\n

\n"; pcnt++; - para_is_ltr = true; // back to default direction + para_is_ltr = true; // back to default direction } if (last_word_in_block) { hocr_str += "
\n"; @@ -1620,8 +1612,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { * page_number is 0-based but will appear in the output as 1-based. */ char* TessBaseAPI::GetTSVText(int page_number) { - if (tesseract_ == NULL || - (page_res_ == NULL && Recognize(NULL) < 0)) + if (tesseract_ == NULL || (page_res_ == NULL && Recognize(NULL) < 0)) return NULL; int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; @@ -1629,9 +1620,10 @@ char* TessBaseAPI::GetTSVText(int page_number) { STRING tsv_str(""); - int page_num = page_id, block_num = 0, par_num = 0, line_num = 0, word_num = 0; + int page_num = page_id, block_num = 0, par_num = 0, line_num = 0, + word_num = 0; - tsv_str.add_str_int("1\t", page_num); // level 1 - page + tsv_str.add_str_int("1\t", page_num); // level 1 - page tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); @@ -1642,7 +1634,7 @@ char* TessBaseAPI::GetTSVText(int page_number) { tsv_str.add_str_int("\t", rect_height_); tsv_str += "\t-1\t\n"; - ResultIterator *res_it = GetIterator(); + ResultIterator* res_it = GetIterator(); while (!res_it->Empty(RIL_BLOCK)) { if (res_it->Empty(RIL_WORD)) { res_it->Next(RIL_WORD); @@ -1652,46 +1644,46 @@ char* TessBaseAPI::GetTSVText(int page_number) { // Add rows for any new block/paragraph/textline. if (res_it->IsAtBeginningOf(RIL_BLOCK)) { block_num++, par_num = 0, line_num = 0, word_num = 0; - tsv_str.add_str_int("2\t", page_num); // level 2 - block + tsv_str.add_str_int("2\t", page_num); // level 2 - block tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", word_num); AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for block + tsv_str += "\t-1\t\n"; // end of row for block } if (res_it->IsAtBeginningOf(RIL_PARA)) { par_num++, line_num = 0, word_num = 0; - tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph + tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", word_num); AddBoxToTSV(res_it, RIL_PARA, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for para + tsv_str += "\t-1\t\n"; // end of row for para } if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { line_num++, word_num = 0; - tsv_str.add_str_int("4\t", page_num); // level 4 - line + tsv_str.add_str_int("4\t", page_num); // level 4 - line tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", word_num); AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for line + tsv_str += "\t-1\t\n"; // end of row for line } // Now, process the word... int left, top, right, bottom; bool bold, italic, underlined, monospace, serif, smallcaps; int pointsize, font_id; - const char *font_name; + const char* font_name; res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, - &monospace, &serif, &smallcaps, - &pointsize, &font_id); + font_name = + res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, + &serif, &smallcaps, &pointsize, &font_id); word_num++; - tsv_str.add_str_int("5\t", page_num); // level 5 - word + tsv_str.add_str_int("5\t", page_num); // level 5 - word tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); @@ -1712,11 +1704,11 @@ char* TessBaseAPI::GetTSVText(int page_number) { tsv_str += res_it->GetUTF8Text(RIL_SYMBOL); res_it->Next(RIL_SYMBOL); } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - tsv_str += "\n"; // end of row + tsv_str += "\n"; // end of row wcnt++; } - char *ret = new char[tsv_str.length() + 1]; + char* ret = new char[tsv_str.length() + 1]; strcpy(ret, tsv_str.string()); delete res_it; return ret; @@ -1907,17 +1899,17 @@ char* TessBaseAPI::GetUNLVText() { return result; } - /** - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - * page_number is a 0-based page index that will appear in the osd file. - */ +/** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * page_number is a 0-based page index that will appear in the osd file. + */ char* TessBaseAPI::GetOsdText(int page_number) { OSResults osr; bool osd = DetectOS(&osr); if (!osd) { - return NULL; + return NULL; } int orient_id = osr.best_result.orientation_id; @@ -1931,19 +1923,18 @@ char* TessBaseAPI::GetOsdText(int page_number) { int orient_deg = orient_id * 90; // clockwise rotation needed to make the page upright - int rotate = OrientationIdToValue(orient_id); + int rotate = OrientationIdToValue(orient_id); char* osd_buf = new char[255]; snprintf(osd_buf, 255, - "Page number: %d\n" - "Orientation in degrees: %d\n" - "Rotate: %d\n" - "Orientation confidence: %.2f\n" - "Script: %s\n" - "Script confidence: %.2f\n", - page_number, - orient_deg, rotate, orient_conf, - script_name, script_conf); + "Page number: %d\n" + "Orientation in degrees: %d\n" + "Rotate: %d\n" + "Orientation confidence: %.2f\n" + "Script: %s\n" + "Script confidence: %.2f\n", + page_number, orient_deg, rotate, orient_conf, script_name, + script_conf); return osd_buf; } diff --git a/api/baseapi.h b/api/baseapi.h index 3b0d3f67ce..d872689eec 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -588,8 +588,8 @@ class TESS_API TessBaseAPI { * data structures. * page_number is 0-based but will appear in the output as 1-based. * monitor can be used to - * cancel the recognition - * receive progress callbacks + * cancel the recognition + * receive progress callbacks */ char* GetHOCRText(ETEXT_DESC* monitor, int page_number); @@ -750,13 +750,9 @@ class TESS_API TessBaseAPI { */ static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); - Tesseract* tesseract() const { - return tesseract_; - } + Tesseract* tesseract() const { return tesseract_; } - OcrEngineMode oem() const { - return last_oem_requested_; - } + OcrEngineMode oem() const { return last_oem_requested_; } void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } @@ -898,7 +894,7 @@ class TESS_API TessBaseAPI { const char* retry_config, int timeout_millisec, TessResultRenderer* renderer, int tessedit_page_number); - // TIFF supports multipage so gets special consideration + // TIFF supports multipage so gets special consideration. bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size, const char* filename, diff --git a/api/pdfrenderer.cpp b/api/pdfrenderer.cpp index 4708300492..dc90c5a3bf 100644 --- a/api/pdfrenderer.cpp +++ b/api/pdfrenderer.cpp @@ -20,12 +20,12 @@ #include "config_auto.h" #endif +#include "allheaders.h" #include "baseapi.h" -#include "renderer.h" #include "math.h" +#include "renderer.h" #include "strngs.h" #include "tprintf.h" -#include "allheaders.h" #ifdef _MSC_VER #include "mathfix.h" @@ -282,7 +282,7 @@ void AffineMatrix(int writing_direction, } } -// There are some really stupid PDF viewers in the wild, such as +// There are some really awkward PDF viewers in the wild, such as // 'Preview' which ships with the Mac. They do a better job with text // selection and highlighting when given perfectly flat baseline // instead of very slightly tilted. We clip small tilts to appease @@ -441,8 +441,8 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, int code = unicodes[i]; // Convert to UTF-16BE https://en.wikipedia.org/wiki/UTF-16 if ((code > 0xD7FF && code < 0xE000) || code > 0x10FFFF) { - tprintf("Dropping invalid codepoint %d\n", code); - continue; + tprintf("Dropping invalid codepoint %d\n", code); + continue; } if (code < 0x10000) { snprintf(utf16, sizeof(utf16), "<%04X>", code); @@ -567,7 +567,8 @@ bool TessPDFRenderer::BeginDocumentHandler() { "<<\n" " /Length %lu /Filter /FlateDecode\n" ">>\n" - "stream\n", (unsigned long)len); + "stream\n", + (unsigned long)len); if (n >= sizeof(buf)) { lept_free(comp); return false; diff --git a/api/renderer.cpp b/api/renderer.cpp index 4a88a24608..e683149381 100644 --- a/api/renderer.cpp +++ b/api/renderer.cpp @@ -155,11 +155,11 @@ TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info) bool TessHOcrRenderer::BeginDocumentHandler() { AppendString( - "\n" - "\n" - "\n \n "); + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n" + " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" + "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" " + "lang=\"en\">\n <head>\n <title>"); AppendString(title()); AppendString( "\n" @@ -198,25 +198,25 @@ bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) { /********************************************************************** * TSV Text Renderer interface implementation **********************************************************************/ -TessTsvRenderer::TessTsvRenderer(const char *outputbase) +TessTsvRenderer::TessTsvRenderer(const char* outputbase) : TessResultRenderer(outputbase, "tsv") { - font_info_ = false; + font_info_ = false; } -TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info) +TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info) : TessResultRenderer(outputbase, "tsv") { - font_info_ = font_info; + font_info_ = font_info; } bool TessTsvRenderer::BeginDocumentHandler() { // Output TSV column headings - AppendString("level\tpage_num\tblock_num\tpar_num\tline_num\tword_num\tleft\ttop\twidth\theight\tconf\ttext\n"); + AppendString( + "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" + "num\tleft\ttop\twidth\theight\tconf\ttext\n"); return true; } -bool TessTsvRenderer::EndDocumentHandler() { - return true; -} +bool TessTsvRenderer::EndDocumentHandler() { return true; } bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) { char* tsv = api->GetTSVText(imagenum()); @@ -266,8 +266,7 @@ bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) { * Osd Text Renderer interface implementation **********************************************************************/ TessOsdRenderer::TessOsdRenderer(const char* outputbase) - : TessResultRenderer(outputbase, "osd") { -} + : TessResultRenderer(outputbase, "osd") {} bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) { char* osd = api->GetOsdText(imagenum()); diff --git a/api/renderer.h b/api/renderer.h index 6b47813f7b..ac64e4b452 100644 --- a/api/renderer.h +++ b/api/renderer.h @@ -126,7 +126,7 @@ class TESS_API TessResultRenderer { private: const char* file_extension_; // standard extension for generated output - const char* title_; // title of document being renderered + const char* title_; // title of document being renderered int imagenum_; // index of last image added FILE* fout_; // output file pointer @@ -153,13 +153,13 @@ class TESS_API TessHOcrRenderer : public TessResultRenderer { explicit TessHOcrRenderer(const char *outputbase, bool font_info); explicit TessHOcrRenderer(const char *outputbase); -protected: + protected: virtual bool BeginDocumentHandler(); virtual bool AddImageHandler(TessBaseAPI* api); virtual bool EndDocumentHandler(); -private: - bool font_info_; // whether to print font information + private: + bool font_info_; // whether to print font information }; /** @@ -167,15 +167,15 @@ class TESS_API TessHOcrRenderer : public TessResultRenderer { */ class TESS_API TessTsvRenderer : public TessResultRenderer { public: - explicit TessTsvRenderer(const char *outputbase, bool font_info); - explicit TessTsvRenderer(const char *outputbase); + explicit TessTsvRenderer(const char* outputbase, bool font_info); + explicit TessTsvRenderer(const char* outputbase); -protected: + protected: virtual bool BeginDocumentHandler(); virtual bool AddImageHandler(TessBaseAPI* api); virtual bool EndDocumentHandler(); -private: + private: bool font_info_; // whether to print font information }; @@ -188,12 +188,12 @@ class TESS_API TessPDFRenderer : public TessResultRenderer { // we load a custom PDF font from this location. TessPDFRenderer(const char *outputbase, const char *datadir); -protected: + protected: virtual bool BeginDocumentHandler(); virtual bool AddImageHandler(TessBaseAPI* api); virtual bool EndDocumentHandler(); -private: + private: // We don't want to have every image in memory at once, // so we store some metadata as we go along producing // PDFs one page at a time. At the end that metadata is diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 3fe8dc8c53..71a0a6a8b7 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -40,28 +40,28 @@ static void Win32WarningHandler(const char* module, const char* fmt, va_list ap) { - if (module != NULL) { - fprintf(stderr, "%s: ", module); - } - fprintf(stderr, "Warning, "); - vfprintf(stderr, fmt, ap); - fprintf(stderr, ".\n"); + if (module != NULL) { + fprintf(stderr, "%s: ", module); + } + fprintf(stderr, "Warning, "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, ".\n"); } #endif /* HAVE_TIFFIO_H && _WIN32 */ void PrintVersionInfo() { - char *versionStrP; + char* versionStrP; - printf("tesseract %s\n", tesseract::TessBaseAPI::Version()); + printf("tesseract %s\n", tesseract::TessBaseAPI::Version()); - versionStrP = getLeptonicaVersion(); - printf(" %s\n", versionStrP); - lept_free(versionStrP); + versionStrP = getLeptonicaVersion(); + printf(" %s\n", versionStrP); + lept_free(versionStrP); - versionStrP = getImagelibVersions(); - printf(" %s\n", versionStrP); - lept_free(versionStrP); + versionStrP = getImagelibVersions(); + printf(" %s\n", versionStrP); + lept_free(versionStrP); #ifdef USE_OPENCL cl_platform_id platform; @@ -82,7 +82,7 @@ void PrintVersionInfo() { printf(" Found %d devices.\n", num_devices); for (i = 0; i < num_devices; ++i) { clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0); - printf(" Device %d name: %s.\n", i+1, info); + printf(" Device %d name: %s.\n", i + 1, info); } #endif } @@ -90,7 +90,7 @@ void PrintVersionInfo() { void PrintUsage(const char* program) { printf( "Usage:\n" - " %s --help | --help-psm | --version\n" + " %s --help | --help-psm | --help-oem | --version\n" " %s --list-langs [--tessdata-dir PATH]\n" " %s --print-parameters [options...] [configfile...]\n" " %s imagename|stdin outputbase|stdout [options...] [configfile...]\n", @@ -100,28 +100,26 @@ void PrintUsage(const char* program) { void PrintHelpForPSM() { const char* msg = "Page segmentation modes:\n" - " 0 Orientation and script detection (OSD) only.\n" - " 1 Automatic page segmentation with OSD.\n" - " 2 Automatic page segmentation, but no OSD, or OCR.\n" - " 3 Fully automatic page segmentation, but no OSD. (Default)\n" - " 4 Assume a single column of text of variable sizes.\n" - " 5 Assume a single uniform block of vertically aligned text.\n" - " 6 Assume a single uniform block of text.\n" - " 7 Treat the image as a single text line.\n" - " 8 Treat the image as a single word.\n" - " 9 Treat the image as a single word in a circle.\n" - " 10 Treat the image as a single character.\n" - - //TODO: Consider publishing these modes. - #if 0 - " 11 Sparse text. Find as much text as possible in no" - " particular order.\n" - " 12 Sparse text with OSD.\n" - " 13 Raw line. Treat the image as a single text line,\n" - "\t\t\tbypassing hacks that are Tesseract-specific.\n" - #endif - ; - + " 0 Orientation and script detection (OSD) only.\n" + " 1 Automatic page segmentation with OSD.\n" + " 2 Automatic page segmentation, but no OSD, or OCR.\n" + " 3 Fully automatic page segmentation, but no OSD. (Default)\n" + " 4 Assume a single column of text of variable sizes.\n" + " 5 Assume a single uniform block of vertically aligned text.\n" + " 6 Assume a single uniform block of text.\n" + " 7 Treat the image as a single text line.\n" + " 8 Treat the image as a single word.\n" + " 9 Treat the image as a single word in a circle.\n" + " 10 Treat the image as a single character.\n" + //TODO: Consider publishing these modes. + #if 0 + " 11 Sparse text. Find as much text as possible in no" + " particular order.\n" + " 12 Sparse text with OSD.\n" + " 13 Raw line. Treat the image as a single text line,\n" + "\t\t\tbypassing hacks that are Tesseract-specific.\n" + #endif + ; printf("%s", msg); } @@ -137,31 +135,30 @@ void PrintHelpMessage(const char* program) { " -c VAR=VALUE Set value for config variables.\n" " Multiple -c arguments are allowed.\n" " -psm NUM Specify page segmentation mode.\n" - "NOTE: These options must occur before any configfile.\n" - ; + "NOTE: These options must occur before any configfile.\n"; printf("\n%s\n", ocr_options); PrintHelpForPSM(); - const char *single_options = + const char* single_options = "Single options:\n" " -h, --help Show this help message.\n" " --help-psm Show page segmentation modes.\n" " -v, --version Show version information.\n" " --list-langs List available languages for tesseract engine.\n" - " --print-parameters Print tesseract parameters to stdout.\n" - ; + " --print-parameters Print tesseract parameters to stdout.\n"; printf("\n%s", single_options); } -void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) { +void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, + char** argv) { char opt1[256], opt2[255]; for (int i = 0; i < argc; i++) { if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { strncpy(opt1, argv[i + 1], 255); opt1[255] = '\0'; - char *p = strchr(opt1, '='); + char* p = strchr(opt1, '='); if (!p) { fprintf(stderr, "Missing = in configvar assignment\n"); exit(1); @@ -190,8 +187,8 @@ void PrintLangsList(tesseract::TessBaseAPI* api) { } void PrintBanner() { - tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", - tesseract::TessBaseAPI::Version()); + tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", + tesseract::TessBaseAPI::Version()); } /** @@ -209,31 +206,25 @@ void PrintBanner() { * but that doesn't work. */ void FixPageSegMode(tesseract::TessBaseAPI* api, - tesseract::PageSegMode pagesegmode) { + tesseract::PageSegMode pagesegmode) { if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) - api->SetPageSegMode(pagesegmode); + api->SetPageSegMode(pagesegmode); } // NOTE: arg_i is used here to avoid ugly *i so many times in this function -void ParseArgs(const int argc, char** argv, - const char** lang, - const char** image, - const char** outputbase, - const char** datapath, - bool* list_langs, - bool* print_parameters, - GenericVector* vars_vec, - GenericVector* vars_values, - int* arg_i, - tesseract::PageSegMode* pagesegmode) { +void ParseArgs(const int argc, char** argv, const char** lang, + const char** image, const char** outputbase, + const char** datapath, bool* list_langs, bool* print_parameters, + GenericVector* vars_vec, + GenericVector* vars_values, int* arg_i, + tesseract::PageSegMode* pagesegmode) { if (argc == 1) { PrintHelpMessage(argv[0]); exit(0); } if (argc == 2) { - if ((strcmp(argv[1], "-h") == 0) || - (strcmp(argv[1], "--help") == 0)) { + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { PrintHelpMessage(argv[0]); exit(0); } @@ -241,8 +232,7 @@ void ParseArgs(const int argc, char** argv, PrintHelpForPSM(); exit(0); } - if ((strcmp(argv[1], "-v") == 0) || - (strcmp(argv[1], "--version") == 0)) { + if ((strcmp(argv[1], "-v") == 0) || (strcmp(argv[1], "--version") == 0)) { PrintVersionInfo(); exit(0); } @@ -298,10 +288,10 @@ void ParseArgs(const int argc, char** argv, } } -void PreloadRenderers(tesseract::TessBaseAPI* api, - tesseract::PointerVector* renderers, - tesseract::PageSegMode pagesegmode, - const char* outputbase) { +void PreloadRenderers( + tesseract::TessBaseAPI* api, + tesseract::PointerVector* renderers, + tesseract::PageSegMode pagesegmode, const char* outputbase) { if (pagesegmode == tesseract::PSM_OSD_ONLY) { renderers->push_back(new tesseract::TessOsdRenderer(outputbase)); } else { @@ -311,7 +301,7 @@ void PreloadRenderers(tesseract::TessBaseAPI* api, bool font_info; api->GetBoolVariable("hocr_font_info", &font_info); renderers->push_back( - new tesseract::TessHOcrRenderer(outputbase, font_info)); + new tesseract::TessHOcrRenderer(outputbase, font_info)); } api->GetBoolVariable("tessedit_create_tsv", &b); @@ -324,8 +314,8 @@ void PreloadRenderers(tesseract::TessBaseAPI* api, api->GetBoolVariable("tessedit_create_pdf", &b); if (b) { - renderers->push_back(new tesseract::TessPDFRenderer(outputbase, - api->GetDatapath())); + renderers->push_back( + new tesseract::TessPDFRenderer(outputbase, api->GetDatapath())); } api->GetBoolVariable("tessedit_write_unlv", &b); @@ -359,8 +349,7 @@ void PreloadRenderers(tesseract::TessBaseAPI* api, * **********************************************************************/ - -int main(int argc, char **argv) { +int main(int argc, char** argv) { const char* lang = "eng"; const char* image = NULL; const char* outputbase = NULL; @@ -380,10 +369,8 @@ int main(int argc, char **argv) { TIFFSetWarningHandler(Win32WarningHandler); #endif /* HAVE_TIFFIO_H && _WIN32 */ - ParseArgs(argc, argv, - &lang, &image, &outputbase, &datapath, - &list_langs, &print_parameters, - &vars_vec, &vars_values, &arg_i, &pagesegmode); + ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs, + &print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode); bool banner = false; if (outputbase != NULL && strcmp(outputbase, "-") && @@ -406,8 +393,8 @@ int main(int argc, char **argv) { SetVariablesFromCLArgs(&api, argc, argv); if (list_langs) { - PrintLangsList(&api); - exit(0); + PrintLangsList(&api); + exit(0); } if (print_parameters) { @@ -436,12 +423,13 @@ int main(int argc, char **argv) { tesseract::TextlineOrder order; float deskew_angle; - tesseract::PageIterator* it = api.AnalyseLayout(); + tesseract::PageIterator* it = api.AnalyseLayout(); if (it) { it->Orientation(&orientation, &direction, &order, &deskew_angle); - tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \ - "Deskew angle: %.4f\n", - orientation, direction, order, deskew_angle); + tprintf( + "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" + "Deskew angle: %.4f\n", + orientation, direction, order, deskew_angle); } else { ret_val = 1; } @@ -456,14 +444,12 @@ int main(int argc, char **argv) { // ambigs.train, box.train, box.train.stderr, linebox, rebox bool b = false; bool in_training_mode = - (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || - (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || - (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); + (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || + (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || + (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); tesseract::PointerVector renderers; - - if (in_training_mode) { renderers.push_back(NULL); } else { diff --git a/ccmain/control.cpp b/ccmain/control.cpp index 3970c5429e..5953698932 100644 --- a/ccmain/control.cpp +++ b/ccmain/control.cpp @@ -1,8 +1,8 @@ /****************************************************************** * File: control.cpp (Formerly control.c) * Description: Module-independent matcher controller. - * Author: Ray Smith - * Created: Thu Apr 23 11:09:58 BST 1992 + * Author: Ray Smith + * Created: Thu Apr 23 11:09:58 BST 1992 * ReHacked: Tue Sep 22 08:42:49 BST 1992 Phil Cheatle * * (C) Copyright 1992, Hewlett-Packard Ltd. @@ -73,7 +73,6 @@ void Tesseract::recog_pseudo_word(PAGE_RES* page_res, } } - /** * Recognize a single word in interactive mode. * @@ -219,16 +218,14 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor, if (pass_n == 1) { monitor->progress = 70 * w / words->size(); if (monitor->progress_callback != NULL) { - TBOX box = pr_it->word()->word->bounding_box(); - (*monitor->progress_callback)(monitor->progress, - box.left(), box.right(), - box.top(), box.bottom()); + TBOX box = pr_it->word()->word->bounding_box(); + (*monitor->progress_callback)(monitor->progress, box.left(), + box.right(), box.top(), box.bottom()); } } else { monitor->progress = 70 + 30 * w / words->size(); - if (monitor->progress_callback!=NULL) { - (*monitor->progress_callback)(monitor->progress, - 0, 0, 0, 0); + if (monitor->progress_callback != NULL) { + (*monitor->progress_callback)(monitor->progress, 0, 0, 0, 0); } } if (monitor->deadline_exceeded() || diff --git a/ccmain/docqual.cpp b/ccmain/docqual.cpp index c6e7f17e0f..4706fb3b26 100644 --- a/ccmain/docqual.cpp +++ b/ccmain/docqual.cpp @@ -1,8 +1,8 @@ /****************************************************************** * File: docqual.cpp (Formerly docqual.c) * Description: Document Quality Metrics - * Author: Phil Cheatle - * Created: Mon May 9 11:27:28 BST 1994 + * Author: Phil Cheatle + * Created: Mon May 9 11:27:28 BST 1994 * * (C) Copyright 1994, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -98,8 +98,8 @@ void Tesseract::word_char_quality(WERD_RES *word, ROW *row, inT16 *match_count, inT16 *accepted_match_count) { - if (word->bln_boxes == NULL || - word->rebuild_word == NULL || word->rebuild_word->blobs.empty()) { + if (word->bln_boxes == NULL || word->rebuild_word == NULL || + word->rebuild_word->blobs.empty()) { *match_count = 0; *accepted_match_count = 0; return; @@ -132,7 +132,7 @@ inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) { int expected_outline_count; if (STRING (outlines_odd).contains (c)) - return 0; //Don't use this char + return 0; // Don't use this char else if (STRING (outlines_2).contains (c)) expected_outline_count = 2; else @@ -151,17 +151,16 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it, } } - /************************************************************************* * unrej_good_quality_words() * Accept potential rejects in words which pass the following checks: * - Contains a potential reject * - Word looks like a sensible alpha word. * - Word segmentation is the same as the original image - * - All characters have the expected number of outlines + * - All characters have the expected number of outlines * NOTE - the rejection counts are recalculated after unrejection * - CAN'T do it in a single pass without a bit of fiddling - * - keep it simple but inefficient + * - keep it simple but inefficient *************************************************************************/ void Tesseract::unrej_good_quality_words( //unreject potential PAGE_RES_IT &page_res_it) { @@ -403,7 +402,6 @@ void Tesseract::doc_and_block_rejection( //reject big chunks } // namespace tesseract - /************************************************************************* * reject_whole_page() * Don't believe any of it - set the reject map to 00..00 in all words diff --git a/ccmain/fixspace.cpp b/ccmain/fixspace.cpp index f58c9610fa..5fbe8c9a4e 100644 --- a/ccmain/fixspace.cpp +++ b/ccmain/fixspace.cpp @@ -3,8 +3,8 @@ * Description: Implements a pass over the page res, exploring the alternative * spacing possibilities, trying to use context to improve the * word spacing -* Author: Phil Cheatle -* Created: Thu Oct 21 11:38:43 BST 1993 +* Author: Phil Cheatle +* Created: Thu Oct 21 11:38:43 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -211,7 +211,6 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row, } } - /** * @name eval_word_spacing() * The basic measure is the number of characters in contextually confirmed diff --git a/ccmain/ltrresultiterator.cpp b/ccmain/ltrresultiterator.cpp index d5b8594667..f80e594518 100644 --- a/ccmain/ltrresultiterator.cpp +++ b/ccmain/ltrresultiterator.cpp @@ -145,13 +145,12 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const { return 0.0f; } -void LTRResultIterator::RowAttributes(float* row_height, - float* descenders, +void LTRResultIterator::RowAttributes(float* row_height, float* descenders, float* ascenders) const { - *row_height = it_->row()->row->x_height() + it_->row()-> row->ascenders() - - it_->row()->row->descenders(); - *descenders = it_->row()->row->descenders(); - *ascenders = it_->row()->row->ascenders(); + *row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() - + it_->row()->row->descenders(); + *descenders = it_->row()->row->descenders(); + *ascenders = it_->row()->row->ascenders(); } // Returns the font attributes of the current word. If iterating at a higher diff --git a/ccmain/ltrresultiterator.h b/ccmain/ltrresultiterator.h index 8819c2a0ee..f2605b52d2 100644 --- a/ccmain/ltrresultiterator.h +++ b/ccmain/ltrresultiterator.h @@ -92,8 +92,7 @@ class TESS_API LTRResultIterator : public PageIterator { float Confidence(PageIteratorLevel level) const; // Returns the attributes of the current row. - void RowAttributes(float* row_height, - float* descenders, + void RowAttributes(float* row_height, float* descenders, float* ascenders) const; // ============= Functions that refer to words only ============. diff --git a/ccmain/output.cpp b/ccmain/output.cpp index ddfcfc54b6..6fca63e420 100644 --- a/ccmain/output.cpp +++ b/ccmain/output.cpp @@ -1,8 +1,8 @@ /****************************************************************** * File: output.cpp (Formerly output.c) * Description: Output pass - * Author: Phil Cheatle - * Created: Thu Aug 4 10:56:08 BST 1994 + * Author: Phil Cheatle + * Created: Thu Aug 4 10:56:08 BST 1994 * * (C) Copyright 1994, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -78,18 +78,16 @@ void Tesseract::output_pass( //Tess output pass //send to api while (page_res_it.word () != NULL) { check_debug_pt (page_res_it.word (), 120); - if (target_word_box) - { - - TBOX current_word_box=page_res_it.word ()->word->bounding_box(); - FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2); - if (!target_word_box->contains(center_pt)) - { - page_res_it.forward (); - continue; - } - - } + if (target_word_box) { + TBOX current_word_box = page_res_it.word()->word->bounding_box(); + FCOORD center_pt( + (current_word_box.right() + current_word_box.left()) / 2, + (current_word_box.bottom() + current_word_box.top()) / 2); + if (!target_word_box->contains(center_pt)) { + page_res_it.forward(); + continue; + } + } if (tessedit_write_block_separators && block_of_last_word != page_res_it.block ()) { block_of_last_word = page_res_it.block (); @@ -337,7 +335,7 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) { rating_per_ch = word.rating() / word_res->reject_map.length(); if (rating_per_ch >= suspect_rating_per_ch) - return; //Don't touch bad ratings + return; // Don't touch bad ratings if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) { /* Unreject any Tess Acceptable word - but NOT tess reject chs*/ diff --git a/ccmain/pagesegmain.cpp b/ccmain/pagesegmain.cpp index 4e3c342070..f4401d64d9 100644 --- a/ccmain/pagesegmain.cpp +++ b/ccmain/pagesegmain.cpp @@ -412,9 +412,10 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( "Don't rotate.\n", osd_margin); osd_orientation = 0; } else { - tprintf("OSD: Weak margin (%.2f) for %d blob text block, " - "but using orientation anyway: %d\n", - osd_margin, osd_blobs.length(), osd_orientation); + tprintf( + "OSD: Weak margin (%.2f) for %d blob text block, " + "but using orientation anyway: %d\n", + osd_margin, osd_blobs.length(), osd_orientation); } } } diff --git a/ccmain/paramsd.cpp b/ccmain/paramsd.cpp index 7784f85361..e0e60539f6 100644 --- a/ccmain/paramsd.cpp +++ b/ccmain/paramsd.cpp @@ -329,13 +329,19 @@ void ParamsEditor::WriteParams(char *filename, fclose(fp); sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename); int a = sv_window_->ShowYesNoDialog(msg_str); - if (a == 'n') { return; } // don't write + if (a == 'n') { + return; + } // don't write } fp = fopen (filename, "wb"); // can we write to it? if (fp == NULL) { - sv_window_->AddMessage("Can't write to file " "%s" "", filename); + sv_window_->AddMessage( + "Can't write to file " + "%s" + "", + filename); return; } diff --git a/ccmain/reject.cpp b/ccmain/reject.cpp index aacc80dd6e..72f9d873d1 100644 --- a/ccmain/reject.cpp +++ b/ccmain/reject.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: reject.cpp (Formerly reject.c) * Description: Rejection functions used in tessedit - * Author: Phil Cheatle - * Created: Wed Sep 23 16:50:21 BST 1992 + * Author: Phil Cheatle + * Created: Wed Sep 23 16:50:21 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccstruct/blobbox.cpp b/ccstruct/blobbox.cpp index 280096b5d3..47a625af32 100644 --- a/ccstruct/blobbox.cpp +++ b/ccstruct/blobbox.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: blobbox.cpp (Formerly blobnbox.c) * Description: Code for the textord blob class. - * Author: Ray Smith - * Created: Thu Jul 30 09:08:51 BST 1992 + * Author: Ray Smith + * Created: Thu Jul 30 09:08:51 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccstruct/boxread.cpp b/ccstruct/boxread.cpp index f4aedca5b3..fee0aa9aef 100644 --- a/ccstruct/boxread.cpp +++ b/ccstruct/boxread.cpp @@ -34,8 +34,7 @@ FILE* OpenBoxFile(const STRING& fname) { STRING filename = BoxFileName(fname); FILE* box_file = NULL; if (!(box_file = fopen(filename.string(), "rb"))) { - CANTOPENFILE.error("read_next_box", TESSEXIT, - "Can't open box file %s", + CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s", filename.string()); } return box_file; diff --git a/ccstruct/boxword.h b/ccstruct/boxword.h index 742bbb8e4d..c1fab068bb 100644 --- a/ccstruct/boxword.h +++ b/ccstruct/boxword.h @@ -82,9 +82,7 @@ class BoxWord { const TBOX& bounding_box() const { return bbox_; } - int length() const { - return length_; - } + int length() const { return length_; } const TBOX& BlobBox(int index) const { return boxes_[index]; } diff --git a/ccstruct/coutln.cpp b/ccstruct/coutln.cpp index bc2b119d8c..238272d2c6 100644 --- a/ccstruct/coutln.cpp +++ b/ccstruct/coutln.cpp @@ -48,9 +48,9 @@ ICOORD C_OUTLINE::step_coords[4] = { * @param length length of loop */ -C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left, - ICOORD top_right, inT16 length) - : box (bot_left, top_right), start (startpt->pos), offsets(NULL) { +C_OUTLINE::C_OUTLINE(CRACKEDGE* startpt, ICOORD bot_left, ICOORD top_right, + inT16 length) + : box(bot_left, top_right), start(startpt->pos), offsets(NULL) { inT16 stepindex; //index to step CRACKEDGE *edgept; //current point @@ -71,7 +71,6 @@ C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left, } } - /** * @name C_OUTLINE::C_OUTLINE * @@ -139,7 +138,7 @@ inT16 length //length of loop * @param rotation rotate to coord */ -C_OUTLINE::C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation) : offsets(NULL) { +C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(NULL) { TBOX new_box; //easy bounding inT16 stepindex; //index to step inT16 dirdiff; //direction change @@ -300,7 +299,6 @@ inT32 C_OUTLINE::perimeter() const { return total_steps; } - /** * @name C_OUTLINE::outer_area * @@ -332,7 +330,6 @@ inT32 C_OUTLINE::outer_area() const { return total; } - /** * @name C_OUTLINE::count_transitions * @@ -459,7 +456,6 @@ inT32 C_OUTLINE::count_transitions(inT32 threshold) { return total; } - /** * @name C_OUTLINE::operator< * @@ -468,8 +464,7 @@ inT32 C_OUTLINE::count_transitions(inT32 threshold) { */ BOOL8 -C_OUTLINE::operator< (const C_OUTLINE & other) const -{ +C_OUTLINE::operator<(const C_OUTLINE& other) const { inT16 count = 0; //winding count ICOORD pos; //position of point inT32 stepindex; //index to cstep @@ -495,7 +490,6 @@ C_OUTLINE::operator< (const C_OUTLINE & other) const return count != 0; } - /** * @name C_OUTLINE::winding_number * @@ -534,7 +528,6 @@ inT16 C_OUTLINE::winding_number(ICOORD point) const { return count; //winding number } - /** * C_OUTLINE::turn_direction * @@ -563,7 +556,6 @@ inT16 C_OUTLINE::turn_direction() const { //winding number return count; //winding number } - /** * @name C_OUTLINE::reverse * @@ -586,7 +578,6 @@ void C_OUTLINE::reverse() { //reverse drection } } - /** * @name C_OUTLINE::move * @@ -661,14 +652,27 @@ static void ComputeGradient(const l_uint32* data, int wpl, int x, int y, int width, int height, ICOORD* gradient) { const l_uint32* line = data + y * wpl; - int pix_x_y = x < width && y < height ? - GET_DATA_BYTE(const_cast (reinterpret_cast(line)), x) : 255; - int pix_x_prevy = x < width && y > 0 ? - GET_DATA_BYTE(const_cast (reinterpret_cast(line - wpl)), x) : 255; - int pix_prevx_prevy = x > 0 && y > 0 ? - GET_DATA_BYTE(const_cast (reinterpret_cast(line - wpl)), x - 1) : 255; - int pix_prevx_y = x > 0 && y < height ? - GET_DATA_BYTE(const_cast (reinterpret_cast(line)), x - 1) : 255; + int pix_x_y = + x < width && y < height + ? GET_DATA_BYTE( + const_cast(reinterpret_cast(line)), x) + : 255; + int pix_x_prevy = + x < width && y > 0 + ? GET_DATA_BYTE( + const_cast(reinterpret_cast(line - wpl)), x) + : 255; + int pix_prevx_prevy = + x > 0 && y > 0 + ? GET_DATA_BYTE( + const_cast(reinterpret_cast(line - wpl)), + x - 1) + : 255; + int pix_prevx_y = + x > 0 && y < height + ? GET_DATA_BYTE( + const_cast(reinterpret_cast(line)), x - 1) + : 255; gradient->set_x(pix_x_y + pix_x_prevy - (pix_prevx_y + pix_prevx_prevy)); gradient->set_y(pix_x_prevy + pix_prevx_prevy - (pix_x_y + pix_prevx_y)); } @@ -684,8 +688,10 @@ static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign, if (y <= 0 || y >= height) return false; const l_uint32* line = data + y * wpl; - int pixel1 = GET_DATA_BYTE(const_cast (reinterpret_cast(line - wpl)), x); - int pixel2 = GET_DATA_BYTE(const_cast (reinterpret_cast(line)), x); + int pixel1 = GET_DATA_BYTE( + const_cast(reinterpret_cast(line - wpl)), x); + int pixel2 = + GET_DATA_BYTE(const_cast(reinterpret_cast(line)), x); int diff = (pixel2 - pixel1) * diff_sign; if (diff > *best_diff) { *best_diff = diff; @@ -705,8 +711,10 @@ static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign, int* best_diff, int* best_sum, int* best_x) { if (x <= 0 || x >= width) return false; - int pixel1 = GET_DATA_BYTE(const_cast (reinterpret_cast(line)), x - 1); - int pixel2 = GET_DATA_BYTE(const_cast (reinterpret_cast(line)), x); + int pixel1 = GET_DATA_BYTE( + const_cast(reinterpret_cast(line)), x - 1); + int pixel2 = + GET_DATA_BYTE(const_cast(reinterpret_cast(line)), x); int diff = (pixel2 - pixel1) * diff_sign; if (diff > *best_diff) { *best_diff = diff; @@ -954,8 +962,7 @@ void C_OUTLINE::render_outline(int left, int top, Pix* pix) const { */ #ifndef GRAPHICS_DISABLED -void C_OUTLINE::plot(ScrollView* window, - ScrollView::Color colour) const { +void C_OUTLINE::plot(ScrollView* window, ScrollView::Color colour) const { inT16 stepindex; // index to cstep ICOORD pos; // current position DIR128 stepdir; // direction of step @@ -1016,7 +1023,6 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour, } #endif - /** * @name C_OUTLINE::operator= * @@ -1024,7 +1030,7 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour, * @param source assign from this */ -C_OUTLINE & C_OUTLINE::operator= (const C_OUTLINE & source) { +C_OUTLINE& C_OUTLINE::operator=(const C_OUTLINE& source) { box = source.box; start = source.start; if (steps != NULL) diff --git a/ccstruct/matrix.h b/ccstruct/matrix.h index e13ef31899..56d261cad7 100644 --- a/ccstruct/matrix.h +++ b/ccstruct/matrix.h @@ -1,7 +1,9 @@ /* -*-C-*- ****************************************************************************** * - * File: matrix.h (Formerly matrix.h) + * File: matrix.h (Formerly matrix.h) + * Description: Generic 2-d array/matrix and banded triangular matrix class. + * Author: Ray Smith * Description: Ratings matrix code. (Used by associator) * Author: Mark Seaman, OCR Technology * Created: Wed May 16 13:22:06 1990 diff --git a/ccstruct/mod128.cpp b/ccstruct/mod128.cpp index ee4aa6c3c6..17776a2783 100644 --- a/ccstruct/mod128.cpp +++ b/ccstruct/mod128.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: mod128.c (Formerly dir128.c) * Description: Code to convert a DIR128 to an ICOORD. - * Author: Ray Smith - * Created: Tue Oct 22 11:56:09 BST 1991 + * Author: Ray Smith + * Created: Tue Oct 22 11:56:09 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccstruct/mod128.h b/ccstruct/mod128.h index 592264ba62..9b31d83a64 100644 --- a/ccstruct/mod128.h +++ b/ccstruct/mod128.h @@ -1,8 +1,8 @@ /********************************************************************** * File: mod128.h (Formerly dir128.h) * Description: Header for class which implements modulo arithmetic. - * Author: Ray Smith - * Created: Tue Mar 26 17:48:13 GMT 1991 + * Author: Ray Smith + * Created: Tue Mar 26 17:48:13 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccstruct/pageres.h b/ccstruct/pageres.h index 7329bc89ea..fc84d4d0ea 100644 --- a/ccstruct/pageres.h +++ b/ccstruct/pageres.h @@ -1,7 +1,7 @@ /********************************************************************** * File: pageres.h (Formerly page_res.h) * Description: Results classes used by control.c - * Author: Phil Cheatle + * Author: Phil Cheatle * Created: Tue Sep 22 08:42:49 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. @@ -327,7 +327,7 @@ class WERD_RES : public ELIST_LINK { } // Deep copies everything except the ratings MATRIX. // To get that use deep_copy below. - WERD_RES(const WERD_RES &source) : ELIST_LINK(source) { + WERD_RES(const WERD_RES& source) : ELIST_LINK(source) { InitPointers(); *this = source; // see operator= } @@ -630,7 +630,7 @@ class WERD_RES : public ELIST_LINK { static WERD_RES* deep_copy(const WERD_RES* src) { WERD_RES* result = new WERD_RES(*src); // That didn't copy the ratings, but we want a copy if there is one to - // begin width. + // begin with. if (src->ratings != NULL) result->ratings = src->ratings->DeepCopy(); return result; diff --git a/ccstruct/pdblock.h b/ccstruct/pdblock.h index b64eff36d0..e9139f2ac5 100644 --- a/ccstruct/pdblock.h +++ b/ccstruct/pdblock.h @@ -29,90 +29,76 @@ struct Pix; CLISTIZEH (PDBLK) ///page block -class PDBLK -{ +class PDBLK { friend class BLOCK_RECT_IT; //< block iterator - public: - ///empty constructor - PDBLK() { - hand_poly = NULL; - index_ = 0; - } - ///simple constructor - PDBLK(inT16 xmin, //< bottom left - inT16 ymin, - inT16 xmax, //< top right - inT16 ymax); - - ///set vertex lists - ///@param left list of left vertices - ///@param right list of right vertices - void set_sides(ICOORDELT_LIST *left, - ICOORDELT_LIST *right); - - ///destructor - ~PDBLK () { - if (hand_poly) delete hand_poly; - } - - POLY_BLOCK *poly_block() const { - return hand_poly; - } - ///set the poly block - void set_poly_block(POLY_BLOCK *blk) { - hand_poly = blk; - } - ///get box - void bounding_box(ICOORD &bottom_left, //bottom left - ICOORD &top_right) const { //topright - bottom_left = box.botleft (); - top_right = box.topright (); - } - ///get real box - const TBOX &bounding_box() const { - return box; - } - - int index() const { - return index_; - } - void set_index(int value) { - index_ = value; - } - - ///is pt inside block - BOOL8 contains(ICOORD pt); - - /// reposition block - void move(const ICOORD vec); // by vector - - // Returns a binary Pix mask with a 1 pixel for every pixel within the - // block. Rotates the coordinate system by rerotation prior to rendering. - // If not NULL, mask_box is filled with the position box of the returned - // mask image. - Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box); - - #ifndef GRAPHICS_DISABLED - ///draw histogram - ///@param window window to draw in - ///@param serial serial number - ///@param colour colour to draw in - void plot(ScrollView* window, - inT32 serial, - ScrollView::Color colour); - #endif // GRAPHICS_DISABLED - - ///assignment - ///@param source from this - PDBLK & operator= (const PDBLK & source); - - protected: - POLY_BLOCK *hand_poly; //< weird as well - ICOORDELT_LIST leftside; //< left side vertices - ICOORDELT_LIST rightside; //< right side vertices - TBOX box; //< bounding box - int index_; //< Serial number of this block. + public: + /// empty constructor + PDBLK() { + hand_poly = NULL; + index_ = 0; + } + /// simple constructor + PDBLK(inT16 xmin, //< bottom left + inT16 ymin, + inT16 xmax, //< top right + inT16 ymax); + + /// set vertex lists + ///@param left list of left vertices + ///@param right list of right vertices + void set_sides(ICOORDELT_LIST *left, ICOORDELT_LIST *right); + + /// destructor + ~PDBLK() { + if (hand_poly) delete hand_poly; + } + + POLY_BLOCK *poly_block() const { return hand_poly; } + /// set the poly block + void set_poly_block(POLY_BLOCK *blk) { hand_poly = blk; } + /// get box + void bounding_box(ICOORD &bottom_left, // bottom left + ICOORD &top_right) const { // topright + bottom_left = box.botleft(); + top_right = box.topright(); + } + /// get real box + const TBOX &bounding_box() const { return box; } + + int index() const { return index_; } + void set_index(int value) { index_ = value; } + + /// is pt inside block + BOOL8 contains(ICOORD pt); + + /// reposition block + void move(const ICOORD vec); // by vector + + // Returns a binary Pix mask with a 1 pixel for every pixel within the + // block. Rotates the coordinate system by rerotation prior to rendering. + // If not NULL, mask_box is filled with the position box of the returned + // mask image. + Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box); + +#ifndef GRAPHICS_DISABLED + /// draw histogram + ///@param window window to draw in + ///@param serial serial number + ///@param colour colour to draw in + void plot(ScrollView *window, inT32 serial, ScrollView::Color colour); +#endif // GRAPHICS_DISABLED + + /// assignment + ///@param source from this + PDBLK &operator=(const PDBLK &source); + + protected: + POLY_BLOCK *hand_poly; //< weird as well + ICOORDELT_LIST leftside; //< left side vertices + ICOORDELT_LIST rightside; //< right side vertices + TBOX box; //< bounding box + int index_; //< Serial number of this block. }; class DLLSYM BLOCK_RECT_IT //rectangle iterator diff --git a/ccstruct/polyaprx.cpp b/ccstruct/polyaprx.cpp index 81b8500a0e..7597349543 100644 --- a/ccstruct/polyaprx.cpp +++ b/ccstruct/polyaprx.cpp @@ -214,7 +214,7 @@ EDGEPT edgepts[] //output is array void fix2( //polygonal approx EDGEPT *start, /*loop to approimate */ int area) { - EDGEPT *edgept; /*current point */ + EDGEPT *edgept; /*current point */ EDGEPT *edgept1; EDGEPT *loopstart; /*modified start of loop */ EDGEPT *linestart; /*start of line segment */ diff --git a/ccstruct/polyblk.cpp b/ccstruct/polyblk.cpp index e0a455905f..b5ca2e1212 100644 --- a/ccstruct/polyblk.cpp +++ b/ccstruct/polyblk.cpp @@ -1,7 +1,7 @@ /********************************************************************** * File: polyblk.c (Formerly poly_block.c) * Description: Polygonal blocks - * Author: Sheelagh Lloyd? + * Author: Sheelagh Lloyd? * Created: * * (C) Copyright 1993, Hewlett-Packard Ltd. diff --git a/ccstruct/quspline.cpp b/ccstruct/quspline.cpp index f50cfe50ee..82107e1e25 100644 --- a/ccstruct/quspline.cpp +++ b/ccstruct/quspline.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: quspline.cpp (Formerly qspline.c) * Description: Code for the QSPLINE class. - * Author: Ray Smith - * Created: Tue Oct 08 17:16:12 BST 1991 + * Author: Ray Smith + * Created: Tue Oct 08 17:16:12 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccstruct/ratngs.h b/ccstruct/ratngs.h index 31b27cfb37..446dfc6c5d 100644 --- a/ccstruct/ratngs.h +++ b/ccstruct/ratngs.h @@ -288,7 +288,8 @@ class WERD_CHOICE : public ELIST_LINK { src_certainty, src_permuter); } WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset); - WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) { + WERD_CHOICE(const WERD_CHOICE &word) + : ELIST_LINK(word), unicharset_(word.unicharset_) { this->init(word.length()); this->operator=(word); } diff --git a/ccstruct/rect.cpp b/ccstruct/rect.cpp index 22417485db..4a9fe00b34 100644 --- a/ccstruct/rect.cpp +++ b/ccstruct/rect.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: rect.c (Formerly box.c) * Description: Bounding box class definition. - * Author: Phil Cheatle - * Created: Wed Oct 16 15:18:45 BST 1991 + * Author: Phil Cheatle + * Created: Wed Oct 16 15:18:45 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,10 +29,10 @@ * **********************************************************************/ -TBOX::TBOX( //constructor - const ICOORD pt1, //one corner - const ICOORD pt2 //the other corner - ) { +TBOX::TBOX( // constructor + const ICOORD pt1, // one corner + const ICOORD pt2 // the other corner + ) { if (pt1.x () <= pt2.x ()) { if (pt1.y () <= pt2.y ()) { bot_left = pt1; diff --git a/ccstruct/rect.h b/ccstruct/rect.h index d9b90642f4..f31247a1af 100644 --- a/ccstruct/rect.h +++ b/ccstruct/rect.h @@ -1,8 +1,8 @@ /********************************************************************** * File: rect.h (Formerly box.h) * Description: Bounding box class definition. - * Author: Phil Cheatle - * Created: Wed Oct 16 15:18:45 BST 1991 + * Author: Phil Cheatle + * Created: Wed Oct 16 15:18:45 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -307,9 +307,9 @@ class DLLSYM TBOX { // bounding box * **********************************************************************/ -inline TBOX::TBOX( // constructor - const FCOORD pt // floating centre - ) { +inline TBOX::TBOX( // constructor + const FCOORD pt // floating centre + ) { bot_left = ICOORD ((inT16) floor (pt.x ()), (inT16) floor (pt.y ())); top_right = ICOORD ((inT16) ceil (pt.x ()), (inT16) ceil (pt.y ())); } diff --git a/ccstruct/rejctmap.h b/ccstruct/rejctmap.h index d945dda1fa..009ba58a78 100644 --- a/ccstruct/rejctmap.h +++ b/ccstruct/rejctmap.h @@ -1,8 +1,8 @@ /********************************************************************** * File: rejctmap.h (Formerly rejmap.h) * Description: REJ and REJMAP class functions. - * Author: Phil Cheatle - * Created: Thu Jun 9 13:46:38 BST 1994 + * Author: Phil Cheatle + * Created: Thu Jun 9 13:46:38 BST 1994 * * (C) Copyright 1994, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -48,46 +48,45 @@ OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!! #include "bits16.h" #include "params.h" -enum REJ_FLAGS -{ +enum REJ_FLAGS { /* Reject modes which are NEVER overridden */ - R_TESS_FAILURE, // PERM Tess didn't classify - R_SMALL_XHT, // PERM Xht too small - R_EDGE_CHAR, // PERM Too close to edge of image - R_1IL_CONFLICT, // PERM 1Il confusion - R_POSTNN_1IL, // PERM 1Il unrejected by NN - R_REJ_CBLOB, // PERM Odd blob - R_MM_REJECT, // PERM Matrix match rejection (m's) - R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend + R_TESS_FAILURE, // PERM Tess didn't classify + R_SMALL_XHT, // PERM Xht too small + R_EDGE_CHAR, // PERM Too close to edge of image + R_1IL_CONFLICT, // PERM 1Il confusion + R_POSTNN_1IL, // PERM 1Il unrejected by NN + R_REJ_CBLOB, // PERM Odd blob + R_MM_REJECT, // PERM Matrix match rejection (m's) + R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend /* Initial reject modes (pre NN_ACCEPT) */ - R_POOR_MATCH, // TEMP Ray's original heuristic (Not used) - R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD - R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD - R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD + R_POOR_MATCH, // TEMP Ray's original heuristic (Not used) + R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD + R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD + R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */ - R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop - R_DUBIOUS, // TEMP Post NN dodgy chars - R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN - R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest - R_XHT_FIXUP, // TEMP Xht tests unsure + R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop + R_DUBIOUS, // TEMP Post NN dodgy chars + R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN + R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest + R_XHT_FIXUP, // TEMP Xht tests unsure /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */ - R_BAD_QUALITY, // TEMP Quality metrics bad for WERD + R_BAD_QUALITY, // TEMP Quality metrics bad for WERD /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/ - R_DOC_REJ, // TEMP Document rejection - R_BLOCK_REJ, // TEMP Block rejection - R_ROW_REJ, // TEMP Row rejection - R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space + R_DOC_REJ, // TEMP Document rejection + R_BLOCK_REJ, // TEMP Block rejection + R_ROW_REJ, // TEMP Row rejection + R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space /* Accept modes which occur between the above rejection groups */ - R_NN_ACCEPT, //NN acceptance - R_HYPHEN_ACCEPT, //Hyphen acceptance - R_MM_ACCEPT, //Matrix match acceptance - R_QUALITY_ACCEPT, //Accept word in good quality doc - R_MINIMAL_REJ_ACCEPT //Accept EVERYTHING except tess failures + R_NN_ACCEPT, // NN acceptance + R_HYPHEN_ACCEPT, // Hyphen acceptance + R_MM_ACCEPT, // Matrix match acceptance + R_QUALITY_ACCEPT, // Accept word in good quality doc + R_MINIMAL_REJ_ACCEPT // Accept EVERYTHING except tess failures }; /* REJECT MAP VALUES */ diff --git a/ccstruct/statistc.cpp b/ccstruct/statistc.cpp index 39d5edd180..8b1ba8c9a1 100644 --- a/ccstruct/statistc.cpp +++ b/ccstruct/statistc.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: statistc.c (Formerly stats.c) * Description: Simple statistical package for integer values. - * Author: Ray Smith - * Created: Mon Feb 04 16:56:05 GMT 1991 + * Author: Ray Smith + * Created: Mon Feb 04 16:56:05 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -215,7 +215,6 @@ inT32 STATS::min_bucket() const { // Find min return rangemin_ + min; } - /********************************************************************** * STATS::max_bucket * diff --git a/ccutil/clst.cpp b/ccutil/clst.cpp index fbbb561fad..52caadf38f 100644 --- a/ccutil/clst.cpp +++ b/ccutil/clst.cpp @@ -26,7 +26,7 @@ **********************************************************************/ /*********************************************************************** - * CLIST::internal_deep_clear + * CLIST::internal_deep_clear * * Used by the "deep_clear" member function of derived list * classes to destroy all the elements on the list. @@ -56,9 +56,8 @@ void (*zapper) (void *)) { //ptr to zapper functn } } - /*********************************************************************** - * CLIST::shallow_clear + * CLIST::shallow_clear * * Used by the destructor and the "shallow_clear" member function of derived * list classes to destroy the list. @@ -83,7 +82,7 @@ void CLIST::shallow_clear() { //destroy all links } /*********************************************************************** - * CLIST::assign_to_sublist + * CLIST::assign_to_sublist * * The list is set to a sublist of another list. "This" list must be empty * before this function is invoked. The two iterators passed must refer to @@ -107,9 +106,8 @@ void CLIST::assign_to_sublist( //to this list last = start_it->extract_sublist (end_it); } - /*********************************************************************** - * CLIST::length + * CLIST::length * * Return count of elements on list **********************************************************************/ @@ -123,9 +121,8 @@ inT32 CLIST::length() const { //count elements return count; } - /*********************************************************************** - * CLIST::sort + * CLIST::sort * * Sort elements on list **********************************************************************/ @@ -239,7 +236,7 @@ void CLIST::set_subtract(int comparator(const void*, const void*), **********************************************************************/ /*********************************************************************** - * CLIST_ITERATOR::forward + * CLIST_ITERATOR::forward * * Move the iterator to the next element of the list. * REMEMBER: ALL LISTS ARE CIRCULAR. @@ -276,9 +273,8 @@ void *CLIST_ITERATOR::forward() { return current->data; } - /*********************************************************************** - * CLIST_ITERATOR::data_relative + * CLIST_ITERATOR::data_relative * * Return the data pointer to the element "offset" elements from current. * "offset" must not be less than -1. @@ -312,9 +308,8 @@ void *CLIST_ITERATOR::data_relative( //get data + or - ... return ptr->data; } - /*********************************************************************** - * CLIST_ITERATOR::move_to_last() + * CLIST_ITERATOR::move_to_last() * * Move current so that it is set to the end of the list. * Return data just in case anyone wants it. @@ -336,9 +331,8 @@ void *CLIST_ITERATOR::move_to_last() { return current->data; } - /*********************************************************************** - * CLIST_ITERATOR::exchange() + * CLIST_ITERATOR::exchange() * * Given another iterator, whose current element is a different element on * the same list list OR an element of another list, exchange the two current @@ -434,9 +428,8 @@ void CLIST_ITERATOR::exchange( //positions of 2 link other_it->current = old_current; } - /*********************************************************************** - * CLIST_ITERATOR::extract_sublist() + * CLIST_ITERATOR::extract_sublist() * * This is a private member, used only by CLIST::assign_to_sublist. * Given another iterator for the same list, extract the links from THIS to @@ -478,7 +471,7 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist( //from temp_it.mark_cycle_pt (); do { //walk sublist - if (temp_it.cycled_list ()) //can't find end pt + if (temp_it.cycled_list()) // can't find end pt BAD_SUBLIST.error ("CLIST_ITERATOR.extract_sublist", ABORT, NULL); if (temp_it.at_last ()) { diff --git a/ccutil/clst.h b/ccutil/clst.h index a5a42a6e40..f93d75afcb 100644 --- a/ccutil/clst.h +++ b/ccutil/clst.h @@ -28,9 +28,9 @@ class CLIST_ITERATOR; /********************************************************************** - * CLASS - CLIST_LINK + * CLASS - CLIST_LINK * - * Generic link class for singly linked CONS cell lists + * Generic link class for singly linked CONS cell lists * * Note: No destructor - elements are assumed to be destroyed EITHER after * they have been extracted from a list OR by the CLIST destructor which @@ -50,13 +50,13 @@ class DLLSYM CLIST_LINK data = next = NULL; } - CLIST_LINK( //copy constructor - const CLIST_LINK &) { //don't copy link + CLIST_LINK( // copy constructor + const CLIST_LINK &) { // don't copy link data = next = NULL; } - void operator= ( //don't copy links - const CLIST_LINK &) { + void operator=( // don't copy links + const CLIST_LINK &) { data = next = NULL; } }; @@ -89,8 +89,8 @@ class DLLSYM CLIST void internal_deep_clear ( //destroy all links void (*zapper) (void *)); //ptr to zapper functn - void shallow_clear(); //clear list but don't - //delete data elements + void shallow_clear(); // clear list but don't + // delete data elements bool empty() const { //is list empty? return !last; @@ -136,9 +136,10 @@ class DLLSYM CLIST }; /*********************************************************************** - * CLASS - CLIST_ITERATOR + * CLASS - CLIST_ITERATOR * - * Generic iterator class for singly linked lists with embedded links + * Generic iterator class for singly linked lists with embedded + *links **********************************************************************/ class DLLSYM CLIST_ITERATOR @@ -231,8 +232,8 @@ class DLLSYM CLIST_ITERATOR BOOL8 cycled_list(); //Completed a cycle? - void add_to_end( //add at end & - void *new_data); //don't move + void add_to_end( // add at end & + void *new_data); // don't move void exchange( //positions of 2 links CLIST_ITERATOR *other_it); //other iterator @@ -246,7 +247,7 @@ class DLLSYM CLIST_ITERATOR }; /*********************************************************************** - * CLIST_ITERATOR::set_to_list + * CLIST_ITERATOR::set_to_list * * (Re-)initialise the iterator to point to the start of the list_to_iterate * over. @@ -270,9 +271,8 @@ inline void CLIST_ITERATOR::set_to_list( //change list ex_current_was_cycle_pt = FALSE; } - /*********************************************************************** - * CLIST_ITERATOR::CLIST_ITERATOR + * CLIST_ITERATOR::CLIST_ITERATOR * * CONSTRUCTOR - set iterator to specified list; **********************************************************************/ @@ -281,9 +281,8 @@ inline CLIST_ITERATOR::CLIST_ITERATOR(CLIST *list_to_iterate) { set_to_list(list_to_iterate); } - /*********************************************************************** - * CLIST_ITERATOR::add_after_then_move + * CLIST_ITERATOR::add_after_then_move * * Add a new element to the list after the current element and move the * iterator to the new element. @@ -329,9 +328,8 @@ inline void CLIST_ITERATOR::add_after_then_move( // element to add current = new_element; } - /*********************************************************************** - * CLIST_ITERATOR::add_after_stay_put + * CLIST_ITERATOR::add_after_stay_put * * Add a new element to the list after the current element but do not move * the iterator to the new element. @@ -380,9 +378,8 @@ inline void CLIST_ITERATOR::add_after_stay_put( // element to add } } - /*********************************************************************** - * CLIST_ITERATOR::add_before_then_move + * CLIST_ITERATOR::add_before_then_move * * Add a new element to the list before the current element and move the * iterator to the new element. @@ -425,9 +422,8 @@ inline void CLIST_ITERATOR::add_before_then_move( // element to add current = new_element; } - /*********************************************************************** - * CLIST_ITERATOR::add_before_stay_put + * CLIST_ITERATOR::add_before_stay_put * * Add a new element to the list before the current element but don't move the * iterator to the new element. @@ -471,11 +467,11 @@ inline void CLIST_ITERATOR::add_before_stay_put( // element to add } } - /*********************************************************************** - * CLIST_ITERATOR::add_list_after + * CLIST_ITERATOR::add_list_after * - * Insert another list to this list after the current element but don't move the + * Insert another list to this list after the current element but don't move + *the * iterator. **********************************************************************/ @@ -518,9 +514,8 @@ inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) { } } - /*********************************************************************** - * CLIST_ITERATOR::add_list_before + * CLIST_ITERATOR::add_list_before * * Insert another list to this list before the current element. Move the * iterator to the start of the inserted elements @@ -563,9 +558,8 @@ inline void CLIST_ITERATOR::add_list_before(CLIST *list_to_add) { } } - /*********************************************************************** - * CLIST_ITERATOR::extract + * CLIST_ITERATOR::extract * * Do extraction by removing current from the list, deleting the cons cell * and returning the data to the caller, but NOT updating the iterator. (So @@ -606,9 +600,8 @@ inline void *CLIST_ITERATOR::extract() { return extracted_data; } - /*********************************************************************** - * CLIST_ITERATOR::move_to_first() + * CLIST_ITERATOR::move_to_first() * * Move current so that it is set to the start of the list. * Return data just in case anyone wants it. @@ -626,9 +619,8 @@ inline void *CLIST_ITERATOR::move_to_first() { return current != NULL ? current->data : NULL; } - /*********************************************************************** - * CLIST_ITERATOR::mark_cycle_pt() + * CLIST_ITERATOR::mark_cycle_pt() * * Remember the current location so that we can tell whether we've returned * to this point later. @@ -651,9 +643,8 @@ inline void CLIST_ITERATOR::mark_cycle_pt() { started_cycling = FALSE; } - /*********************************************************************** - * CLIST_ITERATOR::at_first() + * CLIST_ITERATOR::at_first() * * Are we at the start of the list? * @@ -671,9 +662,8 @@ inline BOOL8 CLIST_ITERATOR::at_first() { !ex_current_was_last)); //first and last } - /*********************************************************************** - * CLIST_ITERATOR::at_last() + * CLIST_ITERATOR::at_last() * * Are we at the end of the list? * @@ -691,9 +681,8 @@ inline BOOL8 CLIST_ITERATOR::at_last() { ex_current_was_last)); //first and last } - /*********************************************************************** - * CLIST_ITERATOR::cycled_list() + * CLIST_ITERATOR::cycled_list() * * Have we returned to the cycle_pt since it was set? * @@ -709,9 +698,8 @@ inline BOOL8 CLIST_ITERATOR::cycled_list() { } - /*********************************************************************** - * CLIST_ITERATOR::length() + * CLIST_ITERATOR::length() * * Return the length of the list * @@ -726,9 +714,8 @@ inline inT32 CLIST_ITERATOR::length() { return list->length (); } - /*********************************************************************** - * CLIST_ITERATOR::sort() + * CLIST_ITERATOR::sort() * * Sort the elements of the list, then reposition at the start. * @@ -747,9 +734,8 @@ const void *, const void *)) { move_to_first(); } - /*********************************************************************** - * CLIST_ITERATOR::add_to_end + * CLIST_ITERATOR::add_to_end * * Add a new element to the end of the list without moving the iterator. * This is provided because a single linked list cannot move to the last as @@ -811,7 +797,7 @@ The macro generates: - An element deletion function: CLASSNAME##_c1_zapper - An element copier function: CLASSNAME##_c1_copier - - A CLIST subclass: CLASSNAME##_CLIST + - A CLIST subclass: CLASSNAME##_CLIST - A CLIST_ITERATOR subclass: CLASSNAME##_C_IT @@ -830,114 +816,116 @@ CLISTIZEH is a concatenation of 3 fragments CLISTIZEH_A, CLISTIZEH_B and CLISTIZEH_C. ***********************************************************************/ -#define CLISTIZEH_A( CLASSNAME ) \ - \ -extern DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ -void* link); /*link to delete*/ \ - \ -extern DLLSYM void* CLASSNAME##_c1_copier( /*deep copy a link*/ \ -void* old_element); /*source link */ - -#define CLISTIZEH_B( CLASSNAME ) \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_CLIST \ -* \ -* List class for class CLASSNAME \ -* \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_CLIST : public CLIST \ -{ \ -public: \ - CLASSNAME##_CLIST():CLIST() {} \ - /* constructor */ \ - \ - CLASSNAME##_CLIST( /* don't construct */ \ - const CLASSNAME##_CLIST&) /*by initial assign*/ \ - { DONT_CONSTRUCT_LIST_BY_COPY.error( QUOTE_IT( CLASSNAME##_CLIST ), \ - ABORT, NULL ); } \ - \ -void deep_clear() /* delete elements */ \ - { CLIST::internal_deep_clear( &CLASSNAME##_c1_zapper ); } \ - \ -void operator=( /* prevent assign */ \ - const CLASSNAME##_CLIST&) \ - { DONT_ASSIGN_LISTS.error( QUOTE_IT( CLASSNAME##_CLIST ), \ - ABORT, NULL ); } - -#define CLISTIZEH_C( CLASSNAME ) \ - \ -}; \ - \ - \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_C_IT \ -* \ -* Iterator class for class CLASSNAME##_CLIST \ -* \ -* Note: We don't need to coerce pointers to member functions input \ -* parameters as these are automatically converted to the type of the base \ -* type. ("A ptr to a class may be converted to a pointer to a public base \ -* class of that class") \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_C_IT : public CLIST_ITERATOR \ -{ \ -public: \ - CLASSNAME##_C_IT():CLIST_ITERATOR(){} \ - \ - CLASSNAME##_C_IT( \ - CLASSNAME##_CLIST* list):CLIST_ITERATOR(list){} \ - \ - CLASSNAME* data() \ - { return (CLASSNAME*) CLIST_ITERATOR::data(); } \ - \ - CLASSNAME* data_relative( \ - inT8 offset) \ - { return (CLASSNAME*) CLIST_ITERATOR::data_relative( offset ); } \ - \ - CLASSNAME* forward() \ - { return (CLASSNAME*) CLIST_ITERATOR::forward(); } \ - \ - CLASSNAME* extract() \ - { return (CLASSNAME*) CLIST_ITERATOR::extract(); } \ - \ - CLASSNAME* move_to_first() \ - { return (CLASSNAME*) CLIST_ITERATOR::move_to_first(); } \ - \ - CLASSNAME* move_to_last() \ - { return (CLASSNAME*) CLIST_ITERATOR::move_to_last(); } \ -}; +#define CLISTIZEH_A(CLASSNAME) \ + \ + extern DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ + void *link); /*link to delete*/ \ + \ + extern DLLSYM void \ + *CLASSNAME##_c1_copier( /*deep copy a link*/ \ + void *old_element); /*source link */ + +#define CLISTIZEH_B(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASS - \ + *CLASSNAME##_CLIST \ + * \ + * List class for class \ + *CLASSNAME \ + * \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_CLIST : public CLIST { \ + public: \ + CLASSNAME##_CLIST() : CLIST() {} \ + /* constructor */ \ + \ + CLASSNAME##_CLIST( /* don't construct */ \ + const CLASSNAME##_CLIST &) /*by initial assign*/ \ + { \ + DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_CLIST), ABORT, \ + NULL); \ + } \ + \ + void deep_clear() /* delete elements */ \ + { \ + CLIST::internal_deep_clear(&CLASSNAME##_c1_zapper); \ + } \ + \ + void operator=(/* prevent assign */ \ + const CLASSNAME##_CLIST &) { \ + DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_CLIST), ABORT, NULL); \ + } -#define CLISTIZEH( CLASSNAME ) \ - \ -CLISTIZEH_A( CLASSNAME ) \ - \ -CLISTIZEH_B( CLASSNAME ) \ - \ -CLISTIZEH_C( CLASSNAME ) +#define CLISTIZEH_C(CLASSNAME) \ + } \ + ; \ + \ + /*********************************************************************** \ + * CLASS - CLASSNAME##_C_IT \ + * \ + * Iterator class for class CLASSNAME##_CLIST \ + * \ + * Note: We don't need to coerce pointers to member functions input \ + * parameters as these are automatically converted to the type of the base \ + * type. ("A ptr to a class may be converted to a pointer to a public base \ + * class of that class") \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_C_IT : public CLIST_ITERATOR { \ + public: \ + CLASSNAME##_C_IT() : CLIST_ITERATOR() {} \ + \ + CLASSNAME##_C_IT(CLASSNAME##_CLIST *list) : CLIST_ITERATOR(list) {} \ + \ + CLASSNAME *data() { return (CLASSNAME *)CLIST_ITERATOR::data(); } \ + \ + CLASSNAME *data_relative(inT8 offset) { \ + return (CLASSNAME *)CLIST_ITERATOR::data_relative(offset); \ + } \ + \ + CLASSNAME *forward() { return (CLASSNAME *)CLIST_ITERATOR::forward(); } \ + \ + CLASSNAME *extract() { return (CLASSNAME *)CLIST_ITERATOR::extract(); } \ + \ + CLASSNAME *move_to_first() { \ + return (CLASSNAME *)CLIST_ITERATOR::move_to_first(); \ + } \ + \ + CLASSNAME *move_to_last() { \ + return (CLASSNAME *)CLIST_ITERATOR::move_to_last(); \ + } \ + }; + +#define CLISTIZEH(CLASSNAME) \ + \ + CLISTIZEH_A(CLASSNAME) \ + \ + CLISTIZEH_B(CLASSNAME) \ + \ + CLISTIZEH_C(CLASSNAME) /*********************************************************************** CLISTIZE( CLASSNAME ) MACRO ***********************************************************************/ -#define CLISTIZE( CLASSNAME ) \ - \ -/*********************************************************************** \ -* CLASSNAME##_c1_zapper \ -* \ -* A function which can delete a CLASSNAME element. This is passed to the \ -* generic deep_clear list member function so that when a list is cleared the \ -* elements on the list are properly destroyed from the base class, even \ -* though we don't use a virtual destructor function. \ -**********************************************************************/ \ - \ -DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ -void* link) /*link to delete*/ \ -{ \ -delete (CLASSNAME *) link; \ -} \ +#define CLISTIZE(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASSNAME##_c1_zapper \ + * \ + * A function which can delete a CLASSNAME element. This is passed to the \ + * generic deep_clear list member function so that when a list is cleared \ + *the \ + * elements on the list are properly destroyed from the base class, even \ + * though we don't use a virtual destructor function. \ + **********************************************************************/ \ + \ + DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ + void *link) /*link to delete*/ \ + { \ + delete (CLASSNAME *)link; \ + } #endif diff --git a/ccutil/elst.cpp b/ccutil/elst.cpp index 8ad999b5ba..2d2c9ad65d 100644 --- a/ccutil/elst.cpp +++ b/ccutil/elst.cpp @@ -26,7 +26,7 @@ **********************************************************************/ /*********************************************************************** - * ELIST::internal_clear + * ELIST::internal_clear * * Used by the destructor and the "clear" member function of derived list * classes to destroy all the elements on the list. @@ -57,7 +57,7 @@ void (*zapper) (ELIST_LINK *)) { } /*********************************************************************** - * ELIST::assign_to_sublist + * ELIST::assign_to_sublist * * The list is set to a sublist of another list. "This" list must be empty * before this function is invoked. The two iterators passed must refer to @@ -81,9 +81,8 @@ void ELIST::assign_to_sublist( //to this list last = start_it->extract_sublist (end_it); } - /*********************************************************************** - * ELIST::length + * ELIST::length * * Return count of elements on list **********************************************************************/ @@ -97,9 +96,8 @@ inT32 ELIST::length() const { // count elements return count; } - /*********************************************************************** - * ELIST::sort + * ELIST::sort * * Sort elements on list * NB If you don't like the const declarations in the comparator, coerce yours: @@ -187,7 +185,7 @@ ELIST_LINK *ELIST::add_sorted_and_find( **********************************************************************/ /*********************************************************************** - * ELIST_ITERATOR::forward + * ELIST_ITERATOR::forward * * Move the iterator to the next element of the list. * REMEMBER: ALL LISTS ARE CIRCULAR. @@ -224,9 +222,8 @@ ELIST_LINK *ELIST_ITERATOR::forward() { return current; } - /*********************************************************************** - * ELIST_ITERATOR::data_relative + * ELIST_ITERATOR::data_relative * * Return the data pointer to the element "offset" elements from current. * "offset" must not be less than -1. @@ -260,9 +257,8 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( //get data + or - ... return ptr; } - /*********************************************************************** - * ELIST_ITERATOR::move_to_last() + * ELIST_ITERATOR::move_to_last() * * Move current so that it is set to the end of the list. * Return data just in case anyone wants it. @@ -281,9 +277,8 @@ ELIST_LINK *ELIST_ITERATOR::move_to_last() { return current; } - /*********************************************************************** - * ELIST_ITERATOR::exchange() + * ELIST_ITERATOR::exchange() * * Given another iterator, whose current element is a different element on * the same list list OR an element of another list, exchange the two current @@ -379,9 +374,8 @@ void ELIST_ITERATOR::exchange( //positions of 2 link other_it->current = old_current; } - /*********************************************************************** - * ELIST_ITERATOR::extract_sublist() + * ELIST_ITERATOR::extract_sublist() * * This is a private member, used only by ELIST::assign_to_sublist. * Given another iterator for the same list, extract the links from THIS to @@ -425,7 +419,7 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist( //from temp_it.mark_cycle_pt (); do { //walk sublist - if (temp_it.cycled_list ()) //can't find end pt + if (temp_it.cycled_list()) // can't find end pt BAD_SUBLIST.error ("ELIST_ITERATOR.extract_sublist", ABORT, NULL); if (temp_it.at_last ()) { diff --git a/ccutil/elst.h b/ccutil/elst.h index e239577419..d53a7c34f0 100644 --- a/ccutil/elst.h +++ b/ccutil/elst.h @@ -98,8 +98,8 @@ class DLLSYM ELIST_LINK next = NULL; } - void operator= ( //don't copy links - const ELIST_LINK &) { + void operator=( // don't copy links + const ELIST_LINK &) { next = NULL; } }; @@ -273,8 +273,8 @@ class DLLSYM ELIST_ITERATOR bool cycled_list(); //Completed a cycle? - void add_to_end( //add at end & - ELIST_LINK *new_link); //don't move + void add_to_end( // add at end & + ELIST_LINK *new_link); // don't move void exchange( //positions of 2 links ELIST_ITERATOR *other_it); //other iterator @@ -458,7 +458,6 @@ inline void ELIST_ITERATOR::add_before_then_move( // element to add current = new_element; } - /*********************************************************************** * ELIST_ITERATOR::add_before_stay_put * @@ -501,11 +500,11 @@ inline void ELIST_ITERATOR::add_before_stay_put( // element to add } } - /*********************************************************************** * ELIST_ITERATOR::add_list_after * - * Insert another list to this list after the current element but don't move the + * Insert another list to this list after the current element but don't move + *the * iterator. **********************************************************************/ @@ -959,30 +958,29 @@ ELISTIZEH_C( CLASSNAME ) ELISTIZE( CLASSNAME ) MACRO ***********************************************************************/ -#define ELISTIZE(CLASSNAME) \ - \ -/*********************************************************************** \ -* CLASSNAME##_zapper \ -* \ -* A function which can delete a CLASSNAME element. This is passed to the \ -* generic clear list member function so that when a list is cleared the \ -* elements on the list are properly destroyed from the base class, even \ -* though we don't use a virtual destructor function. \ -**********************************************************************/ \ - \ -DLLSYM void CLASSNAME##_zapper(ELIST_LINK* link) { \ - delete reinterpret_cast(link); \ -} \ - \ -/* Become a deep copy of src_list*/ \ -void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST* src_list, \ - CLASSNAME* (*copier)(const CLASSNAME*)) { \ - \ - CLASSNAME##_IT from_it(const_cast(src_list)); \ - CLASSNAME##_IT to_it(this); \ - \ - for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ - to_it.add_after_then_move((*copier)(from_it.data())); \ -} +#define ELISTIZE(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASSNAME##_zapper \ + * \ + * A function which can delete a CLASSNAME element. This is passed to the \ + * generic clear list member function so that when a list is cleared the \ + * elements on the list are properly destroyed from the base class, even \ + * though we don't use a virtual destructor function. \ + **********************************************************************/ \ + \ + DLLSYM void CLASSNAME##_zapper(ELIST_LINK *link) { \ + delete reinterpret_cast(link); \ + } \ + \ + /* Become a deep copy of src_list*/ \ + void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST *src_list, \ + CLASSNAME *(*copier)(const CLASSNAME *)) { \ + CLASSNAME##_IT from_it(const_cast(src_list)); \ + CLASSNAME##_IT to_it(this); \ + \ + for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ + to_it.add_after_then_move((*copier)(from_it.data())); \ + } #endif diff --git a/ccutil/elst2.cpp b/ccutil/elst2.cpp index 30cedec17b..0d4960ed49 100644 --- a/ccutil/elst2.cpp +++ b/ccutil/elst2.cpp @@ -27,7 +27,7 @@ **********************************************************************/ /*********************************************************************** - * ELIST2::internal_clear + * ELIST2::internal_clear * * Used by the destructor and the "clear" member function of derived list * classes to destroy all the elements on the list. @@ -58,7 +58,7 @@ void (*zapper) (ELIST2_LINK *)) { } /*********************************************************************** - * ELIST2::assign_to_sublist + * ELIST2::assign_to_sublist * * The list is set to a sublist of another list. "This" list must be empty * before this function is invoked. The two iterators passed must refer to @@ -82,9 +82,8 @@ void ELIST2::assign_to_sublist( //to this list last = start_it->extract_sublist (end_it); } - /*********************************************************************** - * ELIST2::length + * ELIST2::length * * Return count of elements on list **********************************************************************/ @@ -98,9 +97,8 @@ inT32 ELIST2::length() const { // count elements return count; } - /*********************************************************************** - * ELIST2::sort + * ELIST2::sort * * Sort elements on list * NB If you don't like the const declarations in the comparator, coerce yours: @@ -180,7 +178,7 @@ void ELIST2::add_sorted(int comparator(const void*, const void*), **********************************************************************/ /*********************************************************************** - * ELIST2_ITERATOR::forward + * ELIST2_ITERATOR::forward * * Move the iterator to the next element of the list. * REMEMBER: ALL LISTS ARE CIRCULAR. @@ -218,9 +216,8 @@ ELIST2_LINK *ELIST2_ITERATOR::forward() { return current; } - /*********************************************************************** - * ELIST2_ITERATOR::backward + * ELIST2_ITERATOR::backward * * Move the iterator to the previous element of the list. * REMEMBER: ALL LISTS ARE CIRCULAR. @@ -257,9 +254,8 @@ ELIST2_LINK *ELIST2_ITERATOR::backward() { return current; } - /*********************************************************************** - * ELIST2_ITERATOR::data_relative + * ELIST2_ITERATOR::data_relative * * Return the data pointer to the element "offset" elements from current. * (This function can't be INLINEd because it contains a loop) @@ -289,9 +285,8 @@ ELIST2_LINK *ELIST2_ITERATOR::data_relative( //get data + or - .. return ptr; } - /*********************************************************************** - * ELIST2_ITERATOR::exchange() + * ELIST2_ITERATOR::exchange() * * Given another iterator, whose current element is a different element on * the same list list OR an element of another list, exchange the two current @@ -399,9 +394,8 @@ void ELIST2_ITERATOR::exchange( //positions of 2 li other_it->current = old_current; } - /*********************************************************************** - * ELIST2_ITERATOR::extract_sublist() + * ELIST2_ITERATOR::extract_sublist() * * This is a private member, used only by ELIST2::assign_to_sublist. * Given another iterator for the same list, extract the links from THIS to @@ -445,7 +439,7 @@ ELIST2_LINK *ELIST2_ITERATOR::extract_sublist( //fr temp_it.mark_cycle_pt (); do { //walk sublist - if (temp_it.cycled_list ()) //can't find end pt + if (temp_it.cycled_list()) // can't find end pt BAD_SUBLIST.error ("ELIST2_ITERATOR.extract_sublist", ABORT, NULL); if (temp_it.at_last ()) { diff --git a/ccutil/elst2.h b/ccutil/elst2.h index 364abd86bc..bf078fbd56 100644 --- a/ccutil/elst2.h +++ b/ccutil/elst2.h @@ -46,9 +46,9 @@ i) The duplication in source does not affect the run time code size - the **********************************************************************/ /********************************************************************** - * CLASS - ELIST2_LINK + * CLASS - ELIST2_LINK * - * Generic link class for doubly linked lists with embedded links + * Generic link class for doubly linked lists with embedded links * * Note: No destructor - elements are assumed to be destroyed EITHER after * they have been extracted from a list OR by the ELIST2 destructor which @@ -68,13 +68,13 @@ class DLLSYM ELIST2_LINK prev = next = NULL; } - ELIST2_LINK( //copy constructor - const ELIST2_LINK &) { //don't copy link + ELIST2_LINK( // copy constructor + const ELIST2_LINK &) { // don't copy link prev = next = NULL; } - void operator= ( //don't copy links - const ELIST2_LINK &) { + void operator=( // don't copy links + const ELIST2_LINK &) { prev = next = NULL; } }; @@ -142,9 +142,10 @@ class DLLSYM ELIST2 }; /*********************************************************************** - * CLASS - ELIST2_ITERATOR + * CLASS - ELIST2_ITERATOR * - * Generic iterator class for doubly linked lists with embedded links + * Generic iterator class for doubly linked lists with embedded + *links **********************************************************************/ class DLLSYM ELIST2_ITERATOR @@ -240,8 +241,8 @@ class DLLSYM ELIST2_ITERATOR BOOL8 cycled_list(); //Completed a cycle? - void add_to_end( //add at end & - ELIST2_LINK *new_link); //don't move + void add_to_end( // add at end & + ELIST2_LINK *new_link); // don't move void exchange( //positions of 2 links ELIST2_ITERATOR *other_it); //other iterator @@ -255,7 +256,7 @@ class DLLSYM ELIST2_ITERATOR }; /*********************************************************************** - * ELIST2_ITERATOR::set_to_list + * ELIST2_ITERATOR::set_to_list * * (Re-)initialise the iterator to point to the start of the list_to_iterate * over. @@ -279,9 +280,8 @@ inline void ELIST2_ITERATOR::set_to_list( //change list ex_current_was_cycle_pt = FALSE; } - /*********************************************************************** - * ELIST2_ITERATOR::ELIST2_ITERATOR + * ELIST2_ITERATOR::ELIST2_ITERATOR * * CONSTRUCTOR - set iterator to specified list; **********************************************************************/ @@ -290,9 +290,8 @@ inline ELIST2_ITERATOR::ELIST2_ITERATOR(ELIST2 *list_to_iterate) { set_to_list(list_to_iterate); } - /*********************************************************************** - * ELIST2_ITERATOR::add_after_then_move + * ELIST2_ITERATOR::add_after_then_move * * Add a new element to the list after the current element and move the * iterator to the new element. @@ -339,9 +338,8 @@ inline void ELIST2_ITERATOR::add_after_then_move( // element to add current = new_element; } - /*********************************************************************** - * ELIST2_ITERATOR::add_after_stay_put + * ELIST2_ITERATOR::add_after_stay_put * * Add a new element to the list after the current element but do not move * the iterator to the new element. @@ -391,9 +389,8 @@ inline void ELIST2_ITERATOR::add_after_stay_put( // element to add } } - /*********************************************************************** - * ELIST2_ITERATOR::add_before_then_move + * ELIST2_ITERATOR::add_before_then_move * * Add a new element to the list before the current element and move the * iterator to the new element. @@ -438,9 +435,8 @@ inline void ELIST2_ITERATOR::add_before_then_move( // element to add current = new_element; } - /*********************************************************************** - * ELIST2_ITERATOR::add_before_stay_put + * ELIST2_ITERATOR::add_before_stay_put * * Add a new element to the list before the current element but don't move the * iterator to the new element. @@ -486,11 +482,11 @@ inline void ELIST2_ITERATOR::add_before_stay_put( // element to add } } - /*********************************************************************** - * ELIST2_ITERATOR::add_list_after + * ELIST2_ITERATOR::add_list_after * - * Insert another list to this list after the current element but don't move the + * Insert another list to this list after the current element but don't move + *the * iterator. **********************************************************************/ @@ -537,9 +533,8 @@ inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) { } } - /*********************************************************************** - * ELIST2_ITERATOR::add_list_before + * ELIST2_ITERATOR::add_list_before * * Insert another list to this list before the current element. Move the * iterator to the start of the inserted elements @@ -586,9 +581,8 @@ inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) { } } - /*********************************************************************** - * ELIST2_ITERATOR::extract + * ELIST2_ITERATOR::extract * * Do extraction by removing current from the list, returning it to the * caller, but NOT updating the iterator. (So that any calling loop can do @@ -631,9 +625,8 @@ inline ELIST2_LINK *ELIST2_ITERATOR::extract() { return extracted_link; } - /*********************************************************************** - * ELIST2_ITERATOR::move_to_first() + * ELIST2_ITERATOR::move_to_first() * * Move current so that it is set to the start of the list. * Return data just in case anyone wants it. @@ -651,9 +644,8 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() { return current; } - /*********************************************************************** - * ELIST2_ITERATOR::move_to_last() + * ELIST2_ITERATOR::move_to_last() * * Move current so that it is set to the end of the list. * Return data just in case anyone wants it. @@ -671,9 +663,8 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() { return current; } - /*********************************************************************** - * ELIST2_ITERATOR::mark_cycle_pt() + * ELIST2_ITERATOR::mark_cycle_pt() * * Remember the current location so that we can tell whether we've returned * to this point later. @@ -696,9 +687,8 @@ inline void ELIST2_ITERATOR::mark_cycle_pt() { started_cycling = FALSE; } - /*********************************************************************** - * ELIST2_ITERATOR::at_first() + * ELIST2_ITERATOR::at_first() * * Are we at the start of the list? * @@ -716,9 +706,8 @@ inline BOOL8 ELIST2_ITERATOR::at_first() { !ex_current_was_last)); //first and last } - /*********************************************************************** - * ELIST2_ITERATOR::at_last() + * ELIST2_ITERATOR::at_last() * * Are we at the end of the list? * @@ -736,9 +725,8 @@ inline BOOL8 ELIST2_ITERATOR::at_last() { ex_current_was_last)); //first and last } - /*********************************************************************** - * ELIST2_ITERATOR::cycled_list() + * ELIST2_ITERATOR::cycled_list() * * Have we returned to the cycle_pt since it was set? * @@ -754,9 +742,8 @@ inline BOOL8 ELIST2_ITERATOR::cycled_list() { } - /*********************************************************************** - * ELIST2_ITERATOR::length() + * ELIST2_ITERATOR::length() * * Return the length of the list * @@ -771,9 +758,8 @@ inline inT32 ELIST2_ITERATOR::length() { return list->length (); } - /*********************************************************************** - * ELIST2_ITERATOR::sort() + * ELIST2_ITERATOR::sort() * * Sort the elements of the list, then reposition at the start. * @@ -792,9 +778,8 @@ const void *, const void *)) { move_to_first(); } - /*********************************************************************** - * ELIST2_ITERATOR::add_to_end + * ELIST2_ITERATOR::add_to_end * * Add a new element to the end of the list without moving the iterator. * This is provided because a single linked list cannot move to the last as @@ -854,7 +839,7 @@ will NOT work correctly for classes derived from this. The macro generates: - An element deletion function: CLASSNAME##_zapper - - An E_LIST2 subclass: CLASSNAME##_LIST + - An E_LIST2 subclass: CLASSNAME##_LIST - An E_LIST2_ITERATOR subclass: CLASSNAME##_IT @@ -873,132 +858,132 @@ ELIST2IZEH is a concatenation of 3 fragments ELIST2IZEH_A, ELIST2IZEH_B and ELIST2IZEH_C. ***********************************************************************/ -#define ELIST2IZEH_A( CLASSNAME ) \ - \ -extern DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ -ELIST2_LINK* link); /*link to delete*/ - -#define ELIST2IZEH_B( CLASSNAME ) \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_LIST \ -* \ -* List class for class CLASSNAME \ -* \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_LIST : public ELIST2 \ -{ \ -public: \ - CLASSNAME##_LIST():ELIST2() {} \ - /* constructor */ \ - \ - CLASSNAME##_LIST( /* don't construct */ \ - const CLASSNAME##_LIST&) /*by initial assign*/\ - { DONT_CONSTRUCT_LIST_BY_COPY.error( QUOTE_IT( CLASSNAME##_LIST ), \ - ABORT, NULL ); } \ - \ -void clear() /* delete elements */\ - { ELIST2::internal_clear( &CLASSNAME##_zapper ); } \ - \ - ~CLASSNAME##_LIST() /* destructor */ \ - { clear(); } \ -\ -/* Become a deep copy of src_list*/ \ -void deep_copy(const CLASSNAME##_LIST* src_list, \ - CLASSNAME* (*copier)(const CLASSNAME*)); \ -\ -void operator=( /* prevent assign */ \ - const CLASSNAME##_LIST&) \ - { DONT_ASSIGN_LISTS.error( QUOTE_IT( CLASSNAME##_LIST ), \ - ABORT, NULL ); } - -#define ELIST2IZEH_C( CLASSNAME ) \ -}; \ - \ - \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_IT \ -* \ -* Iterator class for class CLASSNAME##_LIST \ -* \ -* Note: We don't need to coerce pointers to member functions input \ -* parameters as these are automatically converted to the type of the base \ -* type. ("A ptr to a class may be converted to a pointer to a public base \ -* class of that class") \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_IT : public ELIST2_ITERATOR \ -{ \ -public: \ - CLASSNAME##_IT():ELIST2_ITERATOR(){} \ - \ - CLASSNAME##_IT( \ -CLASSNAME##_LIST* list):ELIST2_ITERATOR(list){} \ - \ - CLASSNAME* data() \ - { return (CLASSNAME*) ELIST2_ITERATOR::data(); } \ - \ - CLASSNAME* data_relative( \ - inT8 offset) \ - { return (CLASSNAME*) ELIST2_ITERATOR::data_relative( offset ); } \ - \ - CLASSNAME* forward() \ - { return (CLASSNAME*) ELIST2_ITERATOR::forward(); } \ - \ - CLASSNAME* backward() \ - { return (CLASSNAME*) ELIST2_ITERATOR::backward(); } \ - \ - CLASSNAME* extract() \ - { return (CLASSNAME*) ELIST2_ITERATOR::extract(); } \ - \ - CLASSNAME* move_to_first() \ - { return (CLASSNAME*) ELIST2_ITERATOR::move_to_first(); } \ - \ - CLASSNAME* move_to_last() \ - { return (CLASSNAME*) ELIST2_ITERATOR::move_to_last(); } \ -}; - -#define ELIST2IZEH( CLASSNAME ) \ - \ -ELIST2IZEH_A( CLASSNAME ) \ - \ -ELIST2IZEH_B( CLASSNAME ) \ - \ -ELIST2IZEH_C( CLASSNAME ) +#define ELIST2IZEH_A(CLASSNAME) \ + \ + extern DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ + ELIST2_LINK *link); /*link to delete*/ + +#define ELIST2IZEH_B(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASS - \ + *CLASSNAME##_LIST \ + * \ + * List class for class \ + *CLASSNAME \ + * \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_LIST : public ELIST2 { \ + public: \ + CLASSNAME##_LIST() : ELIST2() {} \ + /* constructor */ \ + \ + CLASSNAME##_LIST( /* don't construct */ \ + const CLASSNAME##_LIST &) /*by initial assign*/ \ + { \ + DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, \ + NULL); \ + } \ + \ + void clear() /* delete elements */ \ + { \ + ELIST2::internal_clear(&CLASSNAME##_zapper); \ + } \ + \ + ~CLASSNAME##_LIST() /* destructor */ \ + { \ + clear(); \ + } \ + \ + /* Become a deep copy of src_list*/ \ + void deep_copy(const CLASSNAME##_LIST *src_list, \ + CLASSNAME *(*copier)(const CLASSNAME *)); \ + \ + void operator=(/* prevent assign */ \ + const CLASSNAME##_LIST &) { \ + DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, NULL); \ + } +#define ELIST2IZEH_C(CLASSNAME) \ + } \ + ; \ + \ + /*********************************************************************** \ + * CLASS - CLASSNAME##_IT \ + * \ + * Iterator class for class CLASSNAME##_LIST \ + * \ + * Note: We don't need to coerce pointers to member functions input \ + * parameters as these are automatically converted to the type of the base \ + * type. ("A ptr to a class may be converted to a pointer to a public base \ + * class of that class") \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_IT : public ELIST2_ITERATOR { \ + public: \ + CLASSNAME##_IT() : ELIST2_ITERATOR() {} \ + \ + CLASSNAME##_IT(CLASSNAME##_LIST *list) : ELIST2_ITERATOR(list) {} \ + \ + CLASSNAME *data() { return (CLASSNAME *)ELIST2_ITERATOR::data(); } \ + \ + CLASSNAME *data_relative(inT8 offset) { \ + return (CLASSNAME *)ELIST2_ITERATOR::data_relative(offset); \ + } \ + \ + CLASSNAME *forward() { return (CLASSNAME *)ELIST2_ITERATOR::forward(); } \ + \ + CLASSNAME *backward() { return (CLASSNAME *)ELIST2_ITERATOR::backward(); } \ + \ + CLASSNAME *extract() { return (CLASSNAME *)ELIST2_ITERATOR::extract(); } \ + \ + CLASSNAME *move_to_first() { \ + return (CLASSNAME *)ELIST2_ITERATOR::move_to_first(); \ + } \ + \ + CLASSNAME *move_to_last() { \ + return (CLASSNAME *)ELIST2_ITERATOR::move_to_last(); \ + } \ + }; + +#define ELIST2IZEH(CLASSNAME) \ + \ + ELIST2IZEH_A(CLASSNAME) \ + \ + ELIST2IZEH_B(CLASSNAME) \ + \ + ELIST2IZEH_C(CLASSNAME) /*********************************************************************** ELIST2IZE( CLASSNAME ) MACRO ***********************************************************************/ -#define ELIST2IZE( CLASSNAME ) \ - \ -/*********************************************************************** \ -* CLASSNAME##_zapper \ -* \ -* A function which can delete a CLASSNAME element. This is passed to the \ -* generic clear list member function so that when a list is cleared the \ -* elements on the list are properly destroyed from the base class, even \ -* though we don't use a virtual destructor function. \ -**********************************************************************/ \ - \ -DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ -ELIST2_LINK* link) /*link to delete*/ \ -{ \ -delete (CLASSNAME *) link; \ -} \ -\ -/* Become a deep copy of src_list*/ \ -void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST* src_list, \ - CLASSNAME* (*copier)(const CLASSNAME*)) { \ -\ - CLASSNAME##_IT from_it(const_cast(src_list)); \ - CLASSNAME##_IT to_it(this); \ -\ - for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ - to_it.add_after_then_move((*copier)(from_it.data())); \ -} +#define ELIST2IZE(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASSNAME##_zapper \ + * \ + * A function which can delete a CLASSNAME element. This is passed to the \ + * generic clear list member function so that when a list is cleared the \ + * elements on the list are properly destroyed from the base class, even \ + * though we don't use a virtual destructor function. \ + **********************************************************************/ \ + \ + DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ + ELIST2_LINK *link) /*link to delete*/ \ + { \ + delete (CLASSNAME *)link; \ + } \ + \ + /* Become a deep copy of src_list*/ \ + void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST *src_list, \ + CLASSNAME *(*copier)(const CLASSNAME *)) { \ + CLASSNAME##_IT from_it(const_cast(src_list)); \ + CLASSNAME##_IT to_it(this); \ + \ + for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ + to_it.add_after_then_move((*copier)(from_it.data())); \ + } #endif diff --git a/ccutil/errcode.h b/ccutil/errcode.h index d690240036..2f31a7b9ae 100644 --- a/ccutil/errcode.h +++ b/ccutil/errcode.h @@ -87,11 +87,10 @@ const ERRCODE ASSERT_FAILED = "Assert failed"; __FILE__, __LINE__); \ } -#define ASSERT_HOST_MSG(x, ...) if (!(x)) \ - { \ - tprintf(__VA_ARGS__); \ - ASSERT_FAILED.error(#x, ABORT, "in file %s, line %d", \ - __FILE__, __LINE__); \ +#define ASSERT_HOST_MSG(x, ...) \ + if (!(x)) { \ + tprintf(__VA_ARGS__); \ + ASSERT_FAILED.error(#x, ABORT, "in file %s, line %d", __FILE__, __LINE__); \ } void signal_exit(int signal_code); diff --git a/ccutil/lsterr.h b/ccutil/lsterr.h index 42ed07e326..82497ae80d 100644 --- a/ccutil/lsterr.h +++ b/ccutil/lsterr.h @@ -38,6 +38,6 @@ const ERRCODE NULL_PREV = "Previous element on the list is NULL"; const ERRCODE EMPTY_LIST = "List is empty"; const ERRCODE BAD_PARAMETER = "List parameter error"; const ERRCODE STILL_LINKED = -"Attempting to add an element with non NULL links, to a list"; + "Attempting to add an element with non NULL links, to a list"; #endif #endif diff --git a/ccutil/mainblk.cpp b/ccutil/mainblk.cpp index aa73c55002..36becce9ec 100644 --- a/ccutil/mainblk.cpp +++ b/ccutil/mainblk.cpp @@ -74,7 +74,7 @@ void CCUtil::main_setup(const char *argv0, const char *basename) { #endif /* _WIN32 */ #if defined(TESSDATA_PREFIX) } else { - /* Use tessdata prefix which was compiled in. */ +/* Use tessdata prefix which was compiled in. */ #define _STR(a) #a #define _XSTR(a) _STR(a) datadir = _XSTR(TESSDATA_PREFIX); diff --git a/ccutil/ocrclass.h b/ccutil/ocrclass.h index 3175a6d29a..f352956fa0 100644 --- a/ccutil/ocrclass.h +++ b/ccutil/ocrclass.h @@ -1,7 +1,7 @@ /********************************************************************** * File: ocrclass.h * Description: Class definitions and constants for the OCR API. - * Author: Hewlett-Packard Co + * Author: Hewlett-Packard Co * * (C) Copyright 1996, Hewlett-Packard Co. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -110,28 +110,35 @@ typedef struct { /*single character */ * user words found. If it returns true then operation is cancelled. **********************************************************************/ typedef bool (*CANCEL_FUNC)(void* cancel_this, int words); -typedef bool (*PROGRESS_FUNC)(int progress, - int left, int right, int top, int bottom); +typedef bool (*PROGRESS_FUNC)(int progress, int left, int right, int top, + int bottom); class ETEXT_DESC { // output header public: - inT16 count; /// chars in this buffer(0) - inT16 progress; /// percent complete increasing (0-100) + inT16 count; /// chars in this buffer(0) + inT16 progress; /// percent complete increasing (0-100) /** Progress monitor covers word recognition and it does not cover layout * analysis. * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */ - inT8 more_to_come; /// true if not last - volatile inT8 ocr_alive; /// ocr sets to 1, HP 0 - inT8 err_code; /// for errcode use - CANCEL_FUNC cancel; /// returns true to cancel - PROGRESS_FUNC progress_callback; /// called whenever progress increases - void* cancel_this; /// this or other data for cancel - struct timeval end_time; /** time to stop. expected to be set only by call - * to set_deadline_msecs() */ - EANYCODE_CHAR text[1]; /// character data + inT8 more_to_come; /// true if not last + volatile inT8 ocr_alive; /// ocr sets to 1, HP 0 + inT8 err_code; /// for errcode use + CANCEL_FUNC cancel; /// returns true to cancel + PROGRESS_FUNC progress_callback; /// called whenever progress increases + void* cancel_this; /// this or other data for cancel + struct timeval end_time; /// Time to stop. Expected to be set only + /// by call to set_deadline_msecs(). + EANYCODE_CHAR text[1]; /// character data - ETEXT_DESC() : count(0), progress(0), more_to_come(0), ocr_alive(0), - err_code(0), cancel(NULL), cancel_this(NULL) { + ETEXT_DESC() + : count(0), + progress(0), + more_to_come(0), + ocr_alive(0), + err_code(0), + cancel(NULL), + progress_callback(NULL), + cancel_this(NULL) { end_time.tv_sec = 0; end_time.tv_usec = 0; } diff --git a/ccutil/params.cpp b/ccutil/params.cpp index 9b604fbef4..c8dd3514e5 100644 --- a/ccutil/params.cpp +++ b/ccutil/params.cpp @@ -31,8 +31,7 @@ #define EQUAL '=' tesseract::ParamsVectors *GlobalParams() { - static tesseract::ParamsVectors global_params = - tesseract::ParamsVectors(); + static tesseract::ParamsVectors global_params = tesseract::ParamsVectors(); return &global_params; } diff --git a/ccutil/strngs.cpp b/ccutil/strngs.cpp index ff3bbac287..0760852e90 100644 --- a/ccutil/strngs.cpp +++ b/ccutil/strngs.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: strngs.c (Formerly strings.c) * Description: STRING class functions. - * Author: Ray Smith - * Created: Fri Feb 15 09:13:30 GMT 1991 + * Author: Ray Smith + * Created: Fri Feb 15 09:13:30 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccutil/tessdatamanager.h b/ccutil/tessdatamanager.h index fd2685a1d8..e583b70049 100644 --- a/ccutil/tessdatamanager.h +++ b/ccutil/tessdatamanager.h @@ -76,24 +76,24 @@ enum TessdataType { * kTessdataFileSuffixes[i] indicates the file suffix for * tessdata of type i (from TessdataType enum). */ -static const char * const kTessdataFileSuffixes[] = { - kLangConfigFileSuffix, // 0 - kUnicharsetFileSuffix, // 1 - kAmbigsFileSuffix, // 2 - kBuiltInTemplatesFileSuffix, // 3 - kBuiltInCutoffsFileSuffix, // 4 - kNormProtoFileSuffix, // 5 - kPuncDawgFileSuffix, // 6 - kSystemDawgFileSuffix, // 7 - kNumberDawgFileSuffix, // 8 - kFreqDawgFileSuffix, // 9 - kFixedLengthDawgsFileSuffix, // 10 // deprecated - kCubeUnicharsetFileSuffix, // 11 - kCubeSystemDawgFileSuffix, // 12 - kShapeTableFileSuffix, // 13 - kBigramDawgFileSuffix, // 14 - kUnambigDawgFileSuffix, // 15 - kParamsModelFileSuffix, // 16 +static const char *const kTessdataFileSuffixes[] = { + kLangConfigFileSuffix, // 0 + kUnicharsetFileSuffix, // 1 + kAmbigsFileSuffix, // 2 + kBuiltInTemplatesFileSuffix, // 3 + kBuiltInCutoffsFileSuffix, // 4 + kNormProtoFileSuffix, // 5 + kPuncDawgFileSuffix, // 6 + kSystemDawgFileSuffix, // 7 + kNumberDawgFileSuffix, // 8 + kFreqDawgFileSuffix, // 9 + kFixedLengthDawgsFileSuffix, // 10 // deprecated + kCubeUnicharsetFileSuffix, // 11 + kCubeSystemDawgFileSuffix, // 12 + kShapeTableFileSuffix, // 13 + kBigramDawgFileSuffix, // 14 + kUnambigDawgFileSuffix, // 15 + kParamsModelFileSuffix, // 16 }; /** @@ -101,23 +101,23 @@ static const char * const kTessdataFileSuffixes[] = { * of type i (from TessdataType enum) is text, and is binary otherwise. */ static const bool kTessdataFileIsText[] = { - true, // 0 - true, // 1 - true, // 2 - false, // 3 - true, // 4 - true, // 5 - false, // 6 - false, // 7 - false, // 8 - false, // 9 - false, // 10 // deprecated - true, // 11 - false, // 12 - false, // 13 - false, // 14 - false, // 15 - true, // 16 + true, // 0 + true, // 1 + true, // 2 + false, // 3 + true, // 4 + true, // 5 + false, // 6 + false, // 7 + false, // 8 + false, // 9 + false, // 10 // deprecated + true, // 11 + false, // 12 + false, // 13 + false, // 14 + false, // 15 + true, // 16 }; /** diff --git a/ccutil/unicharset.h b/ccutil/unicharset.h index 684655affb..023e84d5b6 100644 --- a/ccutil/unicharset.h +++ b/ccutil/unicharset.h @@ -181,8 +181,7 @@ class UNICHARSET { // Return the UNICHAR_ID of a given unichar representation within the // UNICHARSET. Only the first length characters from unichar_repr are used. - UNICHAR_ID unichar_to_id(const char* const unichar_repr, - int length) const; + UNICHAR_ID unichar_to_id(const char* const unichar_repr, int length) const; // Return the minimum number of bytes that matches a legal UNICHAR_ID, // while leaving the rest of the string encodable. Returns 0 if the diff --git a/classify/classify.cpp b/classify/classify.cpp index 436efd1f2d..7c11c51f6e 100644 --- a/classify/classify.cpp +++ b/classify/classify.cpp @@ -151,8 +151,8 @@ Classify::Classify() INT_MEMBER(classify_integer_matcher_multiplier, 10, "Integer Matcher Multiplier 0-255: ", this->params()), EnableLearning(true), - INT_MEMBER(il1_adaption_test, 0, "Don't adapt to i/I at beginning of word", - this->params()), + INT_MEMBER(il1_adaption_test, 0, + "Don't adapt to i/I at beginning of word", this->params()), BOOL_MEMBER(classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9].", this->params()), double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size", diff --git a/classify/cluster.cpp b/classify/cluster.cpp index b723bfa82e..1f82349552 100644 --- a/classify/cluster.cpp +++ b/classify/cluster.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: cluster.c - ** Purpose: Routines for clustering points in N-D space - ** Author: Dan Johnson - ** History: 5/29/89, DSJ, Created. + ** Filename: cluster.c + ** Purpose: Routines for clustering points in N-D space + ** Author: Dan Johnson + ** History: 5/29/89, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -390,11 +390,11 @@ double InvertMatrix(const float* input, int size, float* inv); * This routine creates a new clusterer data structure, * initializes it, and returns a pointer to it. * - * @param SampleSize number of dimensions in feature space - * @param ParamDesc description of each dimension - * @return pointer to the new clusterer data structure - * @note Exceptions: None - * @note History: 5/29/89, DSJ, Created. + * @param SampleSize number of dimensions in feature space + * @param ParamDesc description of each dimension + * @return pointer to the new clusterer data structure + * @note Exceptions: None + * @note History: 5/29/89, DSJ, Created. */ CLUSTERER * MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) { @@ -437,7 +437,6 @@ MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) { return Clusterer; } // MakeClusterer - /** * This routine creates a new sample data structure to hold * the specified feature. This sample is added to the clusterer @@ -445,14 +444,14 @@ MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) { * clustered later), and a pointer to the sample is returned to * the caller. * - * @param Clusterer clusterer data structure to add sample to - * @param Feature feature to be added to clusterer - * @param CharID unique ident. of char that sample came from + * @param Clusterer clusterer data structure to add sample to + * @param Feature feature to be added to clusterer + * @param CharID unique ident. of char that sample came from * - * @return Pointer to the new sample data structure - * @note Exceptions: ALREADYCLUSTERED MakeSample can't be called after + * @return Pointer to the new sample data structure + * @note Exceptions: ALREADYCLUSTERED MakeSample can't be called after * ClusterSamples has been called - * @note History: 5/29/89, DSJ, Created. + * @note History: 5/29/89, DSJ, Created. */ SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, inT32 CharID) { @@ -490,7 +489,6 @@ SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, return (Sample); } // MakeSample - /** * This routine first checks to see if the samples in this * clusterer have already been clustered before; if so, it does @@ -505,12 +503,12 @@ SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, * list of prototypes that best represent the samples given * the constraints specified in Config. * - * @param Clusterer data struct containing samples to be clustered - * @param Config parameters which control clustering process + * @param Clusterer data struct containing samples to be clustered + * @param Config parameters which control clustering process * * @return Pointer to a list of prototypes - * @note Exceptions: None - * @note History: 5/29/89, DSJ, Created. + * @note Exceptions: None + * @note History: 5/29/89, DSJ, Created. */ LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { //only create cluster tree if samples have never been clustered before @@ -523,10 +521,16 @@ LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { //compute prototypes starting at the root node in the tree ComputePrototypes(Clusterer, Config); - return (Clusterer->ProtoList); + // We don't need the cluster pointers in the protos any more, so null them + // out, which makes it safe to delete the clusterer. + LIST proto_list = Clusterer->ProtoList; + iterate(proto_list) { + PROTOTYPE *proto = reinterpret_cast(first_node(proto_list)); + proto->Cluster = NULL; + } + return Clusterer->ProtoList; } // ClusterSamples - /** * This routine frees all of the memory allocated to the * specified data structure. It will not, however, free @@ -535,10 +539,10 @@ LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { * to NULL to indicate that the cluster data structures no * longer exist. Any sample lists that have been obtained * via calls to GetSamples are no longer valid. - * @param Clusterer pointer to data structure to be freed + * @param Clusterer pointer to data structure to be freed * @return None - * @note Exceptions: None - * @note History: 6/6/89, DSJ, Created. + * @note Exceptions: None + * @note History: 6/6/89, DSJ, Created. */ void FreeClusterer(CLUSTERER *Clusterer) { if (Clusterer != NULL) { @@ -558,21 +562,19 @@ void FreeClusterer(CLUSTERER *Clusterer) { } } // FreeClusterer - /** * This routine frees all of the memory allocated to the * specified list of prototypes. The clusters which are * pointed to by the prototypes are not freed. - * @param ProtoList pointer to list of prototypes to be freed + * @param ProtoList pointer to list of prototypes to be freed * @return None - * @note Exceptions: None - * @note History: 6/6/89, DSJ, Created. + * @note Exceptions: None + * @note History: 6/6/89, DSJ, Created. */ void FreeProtoList(LIST *ProtoList) { destroy_nodes(*ProtoList, FreePrototype); } // FreeProtoList - /** * This routine deallocates the memory consumed by the specified * prototype and modifies the corresponding cluster so that it @@ -606,7 +608,6 @@ void FreePrototype(void *arg) { //PROTOTYPE *Prototype) memfree(Prototype); } // FreePrototype - /** * This routine is used to find all of the samples which * belong to a cluster. It starts by removing the top @@ -617,10 +618,10 @@ void FreePrototype(void *arg) { //PROTOTYPE *Prototype) * If all samples have been found, NULL is returned. * InitSampleSearch() must be called * before NextSample() to initialize the search. - * @param SearchState ptr to list containing clusters to be searched - * @return Pointer to the next leaf cluster (sample) or NULL. - * @note Exceptions: None - * @note History: 6/16/89, DSJ, Created. + * @param SearchState ptr to list containing clusters to be searched + * @return Pointer to the next leaf cluster (sample) or NULL. + * @note Exceptions: None + * @note History: 6/16/89, DSJ, Created. */ CLUSTER *NextSample(LIST *SearchState) { CLUSTER *Cluster; @@ -637,29 +638,27 @@ CLUSTER *NextSample(LIST *SearchState) { } } // NextSample - /** * This routine returns the mean of the specified * prototype in the indicated dimension. - * @param Proto prototype to return mean of - * @param Dimension dimension whose mean is to be returned - * @return Mean of Prototype in Dimension + * @param Proto prototype to return mean of + * @param Dimension dimension whose mean is to be returned + * @return Mean of Prototype in Dimension * @note Exceptions: none - * @note History: 7/6/89, DSJ, Created. + * @note History: 7/6/89, DSJ, Created. */ FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension) { return (Proto->Mean[Dimension]); } // Mean - /** * This routine returns the standard deviation of the * prototype in the indicated dimension. - * @param Proto prototype to return standard deviation of - * @param Dimension dimension whose stddev is to be returned - * @return Standard deviation of Prototype in Dimension + * @param Proto prototype to return standard deviation of + * @param Dimension dimension whose stddev is to be returned + * @return Standard deviation of Prototype in Dimension * @note Exceptions: none - * @note History: 7/6/89, DSJ, Created. + * @note History: 7/6/89, DSJ, Created. */ FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension) { switch (Proto->Style) { @@ -697,10 +696,10 @@ FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension) { * tree are the individual samples themselves; they have no * sub-clusters. The root node of the tree conceptually contains * all of the samples. - * @param Clusterer data structure holdings samples to be clustered - * @return None (the Clusterer data structure is changed) - * @note Exceptions: None - * @note History: 5/29/89, DSJ, Created. + * @param Clusterer data structure holdings samples to be clustered + * @return None (the Clusterer data structure is changed) + * @note Exceptions: None + * @note History: 5/29/89, DSJ, Created. */ void CreateClusterTree(CLUSTERER *Clusterer) { ClusteringContext context; @@ -760,7 +759,6 @@ void CreateClusterTree(CLUSTERER *Clusterer) { memfree(context.candidates); } // CreateClusterTree - /** * This routine is designed to be used in concert with the * KDWalk routine. It will create a potential cluster for @@ -786,7 +784,6 @@ void MakePotentialClusters(ClusteringContext *context, } } // MakePotentialClusters - /** * This routine searches the specified kd-tree for the nearest * neighbor of the specified cluster. It actually uses the @@ -795,12 +792,12 @@ void MakePotentialClusters(ClusteringContext *context, * neighbor is returned, if it can be found, otherwise NULL is * returned. The distance between the 2 nodes is placed * in the specified variable. - * @param Tree kd-tree to search in for nearest neighbor - * @param Cluster cluster whose nearest neighbor is to be found - * @param Distance ptr to variable to report distance found - * @return Pointer to the nearest neighbor of Cluster, or NULL + * @param Tree kd-tree to search in for nearest neighbor + * @param Cluster cluster whose nearest neighbor is to be found + * @param Distance ptr to variable to report distance found + * @return Pointer to the nearest neighbor of Cluster, or NULL * @note Exceptions: none - * @note History: 5/29/89, DSJ, Created. + * @note History: 5/29/89, DSJ, Created. * 7/13/89, DSJ, Removed visibility of kd-tree node data struct */ CLUSTER * @@ -830,17 +827,16 @@ FindNearestNeighbor(KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance) return BestNeighbor; } // FindNearestNeighbor - /** * This routine creates a new permanent cluster from the * clusters specified in TempCluster. The 2 clusters in * TempCluster are marked as "clustered" and deleted from * the kd-tree. The new cluster is then added to the kd-tree. - * @param Clusterer current clustering environment - * @param TempCluster potential cluster to make permanent + * @param Clusterer current clustering environment + * @param TempCluster potential cluster to make permanent * @return Pointer to the new permanent cluster - * @note Exceptions: none - * @note History: 5/29/89, DSJ, Created. + * @note Exceptions: none + * @note History: 5/29/89, DSJ, Created. * 7/13/89, DSJ, Removed visibility of kd-tree node data struct */ CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) { @@ -872,21 +868,20 @@ CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) { return Cluster; } // MakeNewCluster - /** * This routine merges two clusters into one larger cluster. * To do this it computes the number of samples in the new * cluster and the mean of the new cluster. The ParamDesc * information is used to ensure that circular dimensions * are handled correctly. - * @param N # of dimensions (size of arrays) - * @param ParamDesc array of dimension descriptions - * @param n1, n2 number of samples in each old cluster - * @param m array to hold mean of new cluster - * @param m1, m2 arrays containing means of old clusters - * @return The number of samples in the new cluster. - * @note Exceptions: None - * @note History: 5/31/89, DSJ, Created. + * @param N # of dimensions (size of arrays) + * @param ParamDesc array of dimension descriptions + * @param n1, n2 number of samples in each old cluster + * @param m array to hold mean of new cluster + * @param m1, m2 arrays containing means of old clusters + * @return The number of samples in the new cluster. + * @note Exceptions: None + * @note History: 5/31/89, DSJ, Created. */ inT32 MergeClusters(inT16 N, PARAM_DESC ParamDesc[], @@ -921,17 +916,16 @@ inT32 MergeClusters(inT16 N, return n; } // MergeClusters - /** * This routine decides which clusters in the cluster tree * should be represented by prototypes, forms a list of these * prototypes, and places the list in the Clusterer data * structure. - * @param Clusterer data structure holding cluster tree - * @param Config parameters used to control prototype generation - * @return None - * @note Exceptions: None - * @note History: 5/30/89, DSJ, Created. + * @param Clusterer data structure holding cluster tree + * @param Config parameters used to control prototype generation + * @return None + * @note Exceptions: None + * @note History: 5/30/89, DSJ, Created. */ void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { LIST ClusterStack = NIL_LIST; @@ -961,8 +955,7 @@ void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { } } // ComputePrototypes - -/** +/** * This routine attempts to create a prototype from the * specified cluster that conforms to the distribution * specified in Config. If there are too few samples in the @@ -972,12 +965,12 @@ void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { * is generated and NULL is returned. If a prototype can be * found that matches the desired distribution then a pointer * to it is returned, otherwise NULL is returned. - * @param Clusterer data structure holding cluster tree - * @param Config parameters used to control prototype generation - * @param Cluster cluster to be made into a prototype - * @return Pointer to new prototype or NULL - * @note Exceptions: None - * @note History: 6/19/89, DSJ, Created. + * @param Clusterer data structure holding cluster tree + * @param Config parameters used to control prototype generation + * @param Cluster cluster to be made into a prototype + * @return Pointer to new prototype or NULL + * @note Exceptions: None + * @note History: 6/19/89, DSJ, Created. */ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, CLUSTERCONFIG *Config, @@ -1050,7 +1043,6 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, return Proto; } // MakePrototype - /** * This routine checks for clusters which are degenerate and * therefore cannot be analyzed in a statistically valid way. @@ -1063,14 +1055,14 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, * * If the cluster is not degenerate, NULL is returned. * - * @param N number of dimensions - * @param Cluster cluster being analyzed - * @param Statistics statistical info about cluster - * @param Style type of prototype to be generated - * @param MinSamples minimum number of samples in a cluster - * @return Pointer to degenerate prototype or NULL. - * @note Exceptions: None - * @note History: 6/20/89, DSJ, Created. + * @param N number of dimensions + * @param Cluster cluster being analyzed + * @param Statistics statistical info about cluster + * @param Style type of prototype to be generated + * @param MinSamples minimum number of samples in a cluster + * @return Pointer to degenerate prototype or NULL. + * @note Exceptions: None + * @note History: 6/20/89, DSJ, Created. * 7/12/89, DSJ, Changed name and added check for 0 stddev. * 8/8/89, DSJ, Removed check for 0 stddev (handled elsewhere). */ @@ -1110,10 +1102,10 @@ PROTOTYPE *MakeDegenerateProto( //this was MinSample * be split. If not, then a new prototype is formed and * returned to the caller. If there is, then NULL is returned * to the caller. - * @param Clusterer data struct containing samples being clustered + * @param Clusterer data struct containing samples being clustered * @param Config provides the magic number of samples that make a good cluster - * @param Cluster cluster to be made into an elliptical prototype - * @param Statistics statistical info about cluster + * @param Cluster cluster to be made into an elliptical prototype + * @param Statistics statistical info about cluster * @return Pointer to new elliptical prototype or NULL. */ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, @@ -1215,13 +1207,13 @@ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, * be approximated by a spherical normal distribution. If it * can be, then a new prototype is formed and returned to the * caller. If it can't be, then NULL is returned to the caller. - * @param Clusterer data struct containing samples being clustered - * @param Cluster cluster to be made into a spherical prototype - * @param Statistics statistical info about cluster - * @param Buckets histogram struct used to analyze distribution - * @return Pointer to new spherical prototype or NULL. - * @note Exceptions: None - * @note History: 6/1/89, DSJ, Created. + * @param Clusterer data struct containing samples being clustered + * @param Cluster cluster to be made into a spherical prototype + * @param Statistics statistical info about cluster + * @param Buckets histogram struct used to analyze distribution + * @return Pointer to new spherical prototype or NULL. + * @note Exceptions: None + * @note History: 6/1/89, DSJ, Created. */ PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, @@ -1247,19 +1239,18 @@ PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer, return (Proto); } // MakeSphericalProto - /** * This routine tests the specified cluster to see if it can * be approximated by an elliptical normal distribution. If it * can be, then a new prototype is formed and returned to the * caller. If it can't be, then NULL is returned to the caller. - * @param Clusterer data struct containing samples being clustered - * @param Cluster cluster to be made into an elliptical prototype - * @param Statistics statistical info about cluster - * @param Buckets histogram struct used to analyze distribution - * @return Pointer to new elliptical prototype or NULL. - * @note Exceptions: None - * @note History: 6/12/89, DSJ, Created. + * @param Clusterer data struct containing samples being clustered + * @param Cluster cluster to be made into an elliptical prototype + * @param Statistics statistical info about cluster + * @param Buckets histogram struct used to analyze distribution + * @return Pointer to new elliptical prototype or NULL. + * @note Exceptions: None + * @note History: 6/12/89, DSJ, Created. */ PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, @@ -1286,7 +1277,6 @@ PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, return (Proto); } // MakeEllipticalProto - /** * This routine tests each dimension of the specified cluster to * see what distribution would best approximate that dimension. @@ -1295,14 +1285,14 @@ PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, * be represented by one of these distributions, * then a new prototype is formed and returned to the * caller. If it can't be, then NULL is returned to the caller. - * @param Clusterer data struct containing samples being clustered - * @param Cluster cluster to be made into a prototype - * @param Statistics statistical info about cluster - * @param NormalBuckets histogram struct used to analyze distribution - * @param Confidence confidence level for alternate distributions - * @return Pointer to new mixed prototype or NULL. - * @note Exceptions: None - * @note History: 6/12/89, DSJ, Created. + * @param Clusterer data struct containing samples being clustered + * @param Cluster cluster to be made into a prototype + * @param Statistics statistical info about cluster + * @param NormalBuckets histogram struct used to analyze distribution + * @param Confidence confidence level for alternate distributions + * @return Pointer to new mixed prototype or NULL. + * @note Exceptions: None + * @note History: 6/12/89, DSJ, Created. */ PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, CLUSTER *Cluster, @@ -1355,16 +1345,15 @@ PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, return (Proto); } // MakeMixedProto - /** * This routine alters the ith dimension of the specified * mixed prototype to be D_random. - * @param i index of dimension to be changed - * @param Proto prototype whose dimension is to be altered - * @param ParamDesc description of specified dimension - * @return None - * @note Exceptions: None - * @note History: 6/20/89, DSJ, Created. + * @param i index of dimension to be changed + * @param Proto prototype whose dimension is to be altered + * @param ParamDesc description of specified dimension + * @return None + * @note Exceptions: None + * @note History: 6/20/89, DSJ, Created. */ void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) { Proto->Distrib[i] = D_random; @@ -1380,16 +1369,15 @@ void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) { // note that the proto Weight is irrelevant for D_random protos } // MakeDimRandom - /** * This routine alters the ith dimension of the specified * mixed prototype to be uniform. - * @param i index of dimension to be changed - * @param Proto prototype whose dimension is to be altered - * @param Statistics statistical info about prototype - * @return None - * @note Exceptions: None - * @note History: 6/20/89, DSJ, Created. + * @param i index of dimension to be changed + * @param Proto prototype whose dimension is to be altered + * @param Statistics statistical info about prototype + * @return None + * @note Exceptions: None + * @note History: 6/20/89, DSJ, Created. */ void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) { Proto->Distrib[i] = uniform; @@ -1410,7 +1398,6 @@ void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) { // note that the proto Weight is irrelevant for uniform protos } // MakeDimUniform - /** * This routine searches the cluster tree for all leaf nodes * which are samples in the specified cluster. It computes @@ -1420,12 +1407,12 @@ void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) { * return this information to the caller. An incremental * algorithm for computing statistics is not used because * it will not work with circular dimensions. - * @param N number of dimensions - * @param ParamDesc array of dimension descriptions - * @param Cluster cluster whose stats are to be computed - * @return Pointer to new data structure containing statistics - * @note Exceptions: None - * @note History: 6/2/89, DSJ, Created. + * @param N number of dimensions + * @param ParamDesc array of dimension descriptions + * @param Cluster cluster whose stats are to be computed + * @return Pointer to new data structure containing statistics + * @note Exceptions: None + * @note History: 6/2/89, DSJ, Created. */ STATISTICS * ComputeStatistics (inT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { @@ -1502,19 +1489,18 @@ ComputeStatistics (inT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { return (Statistics); } // ComputeStatistics - /** * This routine creates a spherical prototype data structure to * approximate the samples in the specified cluster. * Spherical prototypes have a single variance which is * common across all dimensions. All dimensions are normally * distributed and independent. - * @param N number of dimensions - * @param Cluster cluster to be made into a spherical prototype - * @param Statistics statistical info about samples in cluster - * @return Pointer to a new spherical prototype data structure - * @note Exceptions: None - * @note History: 6/19/89, DSJ, Created. + * @param N number of dimensions + * @param Cluster cluster to be made into a spherical prototype + * @param Statistics statistical info about samples in cluster + * @return Pointer to a new spherical prototype data structure + * @note Exceptions: None + * @note History: 6/19/89, DSJ, Created. */ PROTOTYPE *NewSphericalProto(uinT16 N, CLUSTER *Cluster, @@ -1537,18 +1523,17 @@ PROTOTYPE *NewSphericalProto(uinT16 N, return (Proto); } // NewSphericalProto - /** * This routine creates an elliptical prototype data structure to * approximate the samples in the specified cluster. * Elliptical prototypes have a variance for each dimension. * All dimensions are normally distributed and independent. - * @param N number of dimensions - * @param Cluster cluster to be made into an elliptical prototype - * @param Statistics statistical info about samples in cluster - * @return Pointer to a new elliptical prototype data structure - * @note Exceptions: None - * @note History: 6/19/89, DSJ, Created. + * @param N number of dimensions + * @param Cluster cluster to be made into an elliptical prototype + * @param Statistics statistical info about samples in cluster + * @return Pointer to a new elliptical prototype data structure + * @note Exceptions: None + * @note History: 6/19/89, DSJ, Created. */ PROTOTYPE *NewEllipticalProto(inT16 N, CLUSTER *Cluster, @@ -1579,7 +1564,6 @@ PROTOTYPE *NewEllipticalProto(inT16 N, return (Proto); } // NewEllipticalProto - /** * This routine creates a mixed prototype data structure to * approximate the samples in the specified cluster. @@ -1588,12 +1572,12 @@ PROTOTYPE *NewEllipticalProto(inT16 N, * structure is initially filled in as though it were an * elliptical prototype. The actual distributions of the * dimensions can be altered by other routines. - * @param N number of dimensions - * @param Cluster cluster to be made into a mixed prototype - * @param Statistics statistical info about samples in cluster - * @return Pointer to a new mixed prototype data structure - * @note Exceptions: None - * @note History: 6/19/89, DSJ, Created. + * @param N number of dimensions + * @param Cluster cluster to be made into a mixed prototype + * @param Statistics statistical info about samples in cluster + * @return Pointer to a new mixed prototype data structure + * @note Exceptions: None + * @note History: 6/19/89, DSJ, Created. */ PROTOTYPE *NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics) { PROTOTYPE *Proto; @@ -1609,16 +1593,15 @@ PROTOTYPE *NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics) { return (Proto); } // NewMixedProto - /** * This routine allocates memory to hold a simple prototype * data structure, i.e. one without independent distributions * and variances for each dimension. - * @param N number of dimensions - * @param Cluster cluster to be made into a prototype - * @return Pointer to new simple prototype - * @note Exceptions: None - * @note History: 6/19/89, DSJ, Created. + * @param N number of dimensions + * @param Cluster cluster to be made into a prototype + * @return Pointer to new simple prototype + * @note Exceptions: None + * @note History: 6/19/89, DSJ, Created. */ PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster) { PROTOTYPE *Proto; @@ -1640,7 +1623,6 @@ PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster) { return (Proto); } // NewSimpleProto - /** * This routine returns TRUE if the specified covariance * matrix indicates that all N dimensions are independent of @@ -1653,13 +1635,13 @@ PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster) { * coeff[ij] = stddev[ij] / sqrt (stddev[ii] * stddev[jj]) * The covariance matrix is assumed to be symmetric (which * should always be true). - * @param ParamDesc descriptions of each feature space dimension - * @param N number of dimensions - * @param CoVariance ptr to a covariance matrix - * @param Independence max off-diagonal correlation coefficient - * @return TRUE if dimensions are independent, FALSE otherwise - * @note Exceptions: None - * @note History: 6/4/89, DSJ, Created. + * @param ParamDesc descriptions of each feature space dimension + * @param N number of dimensions + * @param CoVariance ptr to a covariance matrix + * @param Independence max off-diagonal correlation coefficient + * @return TRUE if dimensions are independent, FALSE otherwise + * @note Exceptions: None + * @note History: 6/4/89, DSJ, Created. */ BOOL8 Independent (PARAM_DESC ParamDesc[], @@ -1692,7 +1674,6 @@ inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence) { return (TRUE); } // Independent - /** * This routine returns a histogram data structure which can * be used by other routines to place samples into histogram @@ -1703,12 +1684,12 @@ inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence) { * created so that it minimizes the computation time needed * to create a new bucket. * @param clusterer which keeps a bucket_cache for us. - * @param Distribution type of probability distribution to test for - * @param SampleCount number of samples that are available - * @param Confidence probability of a Type I error - * @return Bucket data structure + * @param Distribution type of probability distribution to test for + * @param SampleCount number of samples that are available + * @param Confidence probability of a Type I error + * @return Bucket data structure * @note Exceptions: none - * @note History: Thu Aug 3 12:58:10 1989, DSJ, Created. + * @note History: Thu Aug 3 12:58:10 1989, DSJ, Created. */ BUCKETS *GetBuckets(CLUSTERER* clusterer, DISTRIBUTION Distribution, @@ -1739,7 +1720,6 @@ BUCKETS *GetBuckets(CLUSTERER* clusterer, return Buckets; } // GetBuckets - /** * This routine creates a histogram data structure which can * be used by other routines to place samples into histogram @@ -1751,12 +1731,12 @@ BUCKETS *GetBuckets(CLUSTERER* clusterer, * order to make this possible, a mapping table is * computed which maps "normalized" samples into the * appropriate bucket. - * @param Distribution type of probability distribution to test for - * @param SampleCount number of samples that are available - * @param Confidence probability of a Type I error + * @param Distribution type of probability distribution to test for + * @param SampleCount number of samples that are available + * @param Confidence probability of a Type I error * @return Pointer to new histogram data structure - * @note Exceptions: None - * @note History: 6/4/89, DSJ, Created. + * @note Exceptions: None + * @note History: 6/4/89, DSJ, Created. */ BUCKETS *MakeBuckets(DISTRIBUTION Distribution, uinT32 SampleCount, @@ -1840,7 +1820,6 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution, return Buckets; } // MakeBuckets - /** * This routine computes the optimum number of histogram * buckets that should be used in a chi-squared goodness of @@ -1851,7 +1830,7 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution, * values. The table is intended for a 0.05 level of * significance (alpha). This routine assumes that it is * equally valid for other alpha's, which may not be true. - * @param SampleCount number of samples to be tested + * @param SampleCount number of samples to be tested * @return Optimum number of histogram buckets * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -1874,7 +1853,6 @@ uinT16 OptimumNumberOfBuckets(uinT32 SampleCount) { return kBucketsTable[Last]; } // OptimumNumberOfBuckets - /** * This routine computes the chi-squared value which will * leave a cumulative probability of Alpha in the right tail @@ -1887,8 +1865,8 @@ uinT16 OptimumNumberOfBuckets(uinT32 SampleCount) { * chi-squared value. Therefore, once a particular chi-squared * value is computed, it is stored in the list and never * needs to be computed again. - * @param DegreesOfFreedom determines shape of distribution - * @param Alpha probability of right tail + * @param DegreesOfFreedom determines shape of distribution + * @param Alpha probability of right tail * @return Desired chi-squared value * @note Exceptions: none * @note History: 6/5/89, DSJ, Created. @@ -1932,19 +1910,19 @@ ComputeChiSquared (uinT16 DegreesOfFreedom, FLOAT64 Alpha) } // ComputeChiSquared - /** * This routine computes the probability density function * of a discrete normal distribution defined by the global * variables kNormalMean, kNormalVariance, and kNormalMagnitude. * Normal magnitude could, of course, be computed in terms of * the normal variance but it is precomputed for efficiency. - * @param x number to compute the normal probability density for + * @param x number to compute the normal probability density for * @note Globals: - * kNormalMean mean of a discrete normal distribution - * kNormalVariance variance of a discrete normal distribution - * kNormalMagnitude magnitude of a discrete normal distribution - * @return The value of the normal distribution at x. + * kNormalMean mean of a discrete normal distribution + * kNormalVariance variance of a discrete normal distribution + * kNormalMagnitude magnitude of a discrete normal + *distribution + * @return The value of the normal distribution at x. * @note Exceptions: None * @note History: 6/4/89, DSJ, Created. */ @@ -1955,12 +1933,11 @@ FLOAT64 NormalDensity(inT32 x) { return kNormalMagnitude * exp(-0.5 * Distance * Distance / kNormalVariance); } // NormalDensity - /** * This routine computes the probability density function * of a uniform distribution at the specified point. The * range of the distribution is from 0 to BUCKETTABLESIZE. - * @param x number to compute the uniform probability density for + * @param x number to compute the uniform probability density for * @return The value of the uniform distribution at x. * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -1974,13 +1951,12 @@ FLOAT64 UniformDensity(inT32 x) { return (FLOAT64) 0.0; } // UniformDensity - /** * This routine computes a trapezoidal approximation to the * integral of a function over a small delta in x. - * @param f1 value of function at x1 - * @param f2 value of function at x2 - * @param Dx x2 - x1 (should always be positive) + * @param f1 value of function at x1 + * @param f2 value of function at x2 + * @param Dx x2 - x1 (should always be positive) * @return Approximation of the integral of the function from x1 to x2. * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -1989,7 +1965,6 @@ FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) { return (f1 + f2) * Dx / 2.0; } // Integral - /** * This routine counts the number of cluster samples which * fall within the various histogram buckets in Buckets. Only @@ -2002,12 +1977,12 @@ FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) { * range and the StdDev is 1/2 the range. A dimension with * zero standard deviation cannot be statistically analyzed. * In this case, a pseudo-analysis is used. - * @param Buckets histogram buckets to count samples - * @param Cluster cluster whose samples are being analyzed - * @param Dim dimension of samples which is being analyzed - * @param ParamDesc description of the dimension - * @param Mean "mean" of the distribution - * @param StdDev "standard deviation" of the distribution + * @param Buckets histogram buckets to count samples + * @param Cluster cluster whose samples are being analyzed + * @param Dim dimension of samples which is being analyzed + * @param ParamDesc description of the dimension + * @param Mean "mean" of the distribution + * @param StdDev "standard deviation" of the distribution * @return None (the Buckets data structure is filled in) * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -2071,16 +2046,15 @@ void FillBuckets(BUCKETS *Buckets, } } // FillBuckets - /** * This routine determines which bucket x falls into in the * discrete normal distribution defined by kNormalMean * and kNormalStdDev. x values which exceed the range of * the discrete distribution are clipped. - * @param ParamDesc used to identify circular dimensions - * @param x value to be normalized - * @param Mean mean of normal distribution - * @param StdDev standard deviation of normal distribution + * @param ParamDesc used to identify circular dimensions + * @param x value to be normalized + * @param Mean mean of normal distribution + * @param StdDev standard deviation of normal distribution * @return Bucket number into which x falls * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -2107,16 +2081,15 @@ uinT16 NormalBucket(PARAM_DESC *ParamDesc, return (uinT16) floor((FLOAT64) X); } // NormalBucket - /** * This routine determines which bucket x falls into in the * discrete uniform distribution defined by * BUCKETTABLESIZE. x values which exceed the range of * the discrete distribution are clipped. - * @param ParamDesc used to identify circular dimensions - * @param x value to be normalized - * @param Mean center of range of uniform distribution - * @param StdDev 1/2 the range of the uniform distribution + * @param ParamDesc used to identify circular dimensions + * @param x value to be normalized + * @param Mean center of range of uniform distribution + * @param StdDev 1/2 the range of the uniform distribution * @return Bucket number into which x falls * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -2143,7 +2116,6 @@ uinT16 UniformBucket(PARAM_DESC *ParamDesc, return (uinT16) floor((FLOAT64) X); } // UniformBucket - /** * This routine performs a chi-square goodness of fit test * on the histogram data in the Buckets data structure. TRUE @@ -2151,7 +2123,7 @@ uinT16 UniformBucket(PARAM_DESC *ParamDesc, * distribution which was specified when the Buckets * structure was originally created. Otherwise FALSE is * returned. - * @param Buckets histogram data to perform chi-square test on + * @param Buckets histogram data to perform chi-square test on * @return TRUE if samples match distribution, FALSE otherwise * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -2176,11 +2148,10 @@ BOOL8 DistributionOK(BUCKETS *Buckets) { return TRUE; } // DistributionOK - /** * This routine frees the memory used by the statistics * data structure. - * @param Statistics pointer to data structure to be freed + * @param Statistics pointer to data structure to be freed * @return None * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -2192,7 +2163,6 @@ void FreeStatistics(STATISTICS *Statistics) { memfree(Statistics); } // FreeStatistics - /** * This routine properly frees the memory used by a BUCKETS. * @@ -2204,13 +2174,12 @@ void FreeBuckets(BUCKETS *buckets) { Efree(buckets); } // FreeBuckets - /** * This routine frees the memory consumed by the specified * cluster and all of its subclusters. This is done by * recursive calls to FreeCluster(). * - * @param Cluster pointer to cluster to be freed + * @param Cluster pointer to cluster to be freed * * @return None * @@ -2225,7 +2194,6 @@ void FreeCluster(CLUSTER *Cluster) { } } // FreeCluster - /** * This routine computes the degrees of freedom that should * be used in a chi-squared test with the specified number of @@ -2234,8 +2202,8 @@ void FreeCluster(CLUSTER *Cluster) { * computed more easily. This will cause the value of * chi-squared to be higher than the optimum value, resulting * in the chi-square test being more lenient than optimum. - * @param Distribution distribution being tested for - * @param HistogramBuckets number of buckets in chi-square test + * @param Distribution distribution being tested for + * @param HistogramBuckets number of buckets in chi-square test * @return The number of degrees of freedom for a chi-square test * @note Exceptions: none * @note History: Thu Aug 3 14:04:18 1989, DSJ, Created. @@ -2252,7 +2220,6 @@ uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets) { } // DegreesOfFreedom - /** * This routine is used to search a list of histogram data * structures to find one with the specified number of @@ -2272,7 +2239,6 @@ int NumBucketsMatch(void *arg1, // BUCKETS *Histogram, } // NumBucketsMatch - /** * This routine is used to search a list for a list node * whose contents match Key. It is called by the list @@ -2287,13 +2253,12 @@ int ListEntryMatch(void *arg1, //ListNode } // ListEntryMatch - /** * This routine multiplies each ExpectedCount histogram entry * by NewSampleCount/OldSampleCount so that the histogram * is now adjusted to the new sample count. - * @param Buckets histogram data structure to adjust - * @param NewSampleCount new sample count to adjust to + * @param Buckets histogram data structure to adjust + * @param NewSampleCount new sample count to adjust to * @return none * @note Exceptions: none * @note History: Thu Aug 3 14:31:14 1989, DSJ, Created. @@ -2313,11 +2278,10 @@ void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount) { } // AdjustBuckets - /** * This routine sets the bucket counts in the specified histogram * to zero. - * @param Buckets histogram data structure to init + * @param Buckets histogram data structure to init * @return none * @note Exceptions: none * @note History: Thu Aug 3 14:31:14 1989, DSJ, Created. @@ -2331,7 +2295,6 @@ void InitBuckets(BUCKETS *Buckets) { } // InitBuckets - /** * This routine is used to search a list of structures which * hold pre-computed chi-squared values for a chi-squared @@ -2355,14 +2318,13 @@ int AlphaMatch(void *arg1, //CHISTRUCT *ChiStruct } // AlphaMatch - /** * This routine allocates a new data structure which is used * to hold a chi-squared value along with its associated * number of degrees of freedom and alpha value. * - * @param DegreesOfFreedom degrees of freedom for new chi value - * @param Alpha confidence level for new chi value + * @param DegreesOfFreedom degrees of freedom for new chi value + * @param Alpha confidence level for new chi value * @return none * @note Exceptions: none * @note History: Fri Aug 4 11:04:59 1989, DSJ, Created. @@ -2377,7 +2339,6 @@ CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha) { } // NewChiStruct - /** * This routine attempts to find an x value at which Function * goes to zero (i.e. a root of the function ). It will only @@ -2385,10 +2346,10 @@ CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha) { * are no extrema between the solution and the InitialGuess. * The algorithms used are extremely primitive. * - * @param Function function whose zero is to be found - * @param FunctionParams arbitrary data to pass to function - * @param InitialGuess point to start solution search at - * @param Accuracy maximum allowed error + * @param Function function whose zero is to be found + * @param FunctionParams arbitrary data to pass to function + * @param InitialGuess point to start solution search at + * @param Accuracy maximum allowed error * @return Solution of function ( x for which f(x) = 0 ). * @note Exceptions: none * @note History: Fri Aug 4 11:08:59 1989, DSJ, Created. @@ -2440,7 +2401,6 @@ void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy) } // Solve - /** * This routine computes the area under a chi density curve * from 0 to x, minus the desired area under the curve. The @@ -2455,8 +2415,8 @@ void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy) * integrating the chi density curve in parts to obtain * a series that can be used to compute the area under the * curve. - * @param ChiParams contains degrees of freedom and alpha - * @param x value of chi-squared to evaluate + * @param ChiParams contains degrees of freedom and alpha + * @param x value of chi-squared to evaluate * @return Error between actual and desired area under the chi curve. * @note Exceptions: none * @note History: Fri Aug 4 12:48:41 1989, DSJ, Created. @@ -2480,7 +2440,6 @@ FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) { } // ChiArea - /** * This routine looks at all samples in the specified cluster. * It computes a running estimate of the percentage of the @@ -2498,10 +2457,10 @@ FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) { * contained in the same cluster, then the cluster should be * split. * - * @param Clusterer data structure holding cluster tree - * @param Cluster cluster containing samples to be tested - * @param MaxIllegal max percentage of samples allowed to have - * more than 1 feature in the cluster + * @param Clusterer data structure holding cluster tree + * @param Cluster cluster containing samples to be tested + * @param MaxIllegal max percentage of samples allowed to have + * more than 1 feature in the cluster * @return TRUE if the cluster should be split, FALSE otherwise. * @note Exceptions: none * @note History: Wed Aug 30 11:13:05 1989, DSJ, Created. @@ -2562,7 +2521,7 @@ CLUSTER * Cluster, FLOAT32 MaxIllegal) } // MultipleCharSamples /** - * Compute the inverse of a matrix using LU decomposition with partial pivoting. + * Compute the inverse of a matrix using LU decomposition with partial pivoting. * The return value is the sum of norms of the off-diagonal terms of the * product of a and inv. (A measure of the error.) */ diff --git a/classify/clusttool.cpp b/classify/clusttool.cpp index d86c3a2407..02e619d273 100644 --- a/classify/clusttool.cpp +++ b/classify/clusttool.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: clustertool.c - ** Purpose: Misc. tools for use with the clustering routines - ** Author: Dan Johnson - ** History: 6/6/89, DSJ, Created. + ** Filename: clustertool.c + ** Purpose: Misc. tools for use with the clustering routines + ** Author: Dan Johnson + ** History: 6/6/89, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -26,9 +26,10 @@ #include //---------------Global Data Definitions and Declarations-------------------- -#define TOKENSIZE 80 //< max size of tokens read from an input file -#define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space -//#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block size) +#define TOKENSIZE 80 //< max size of tokens read from an input file +#define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space +//#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block +// size) /** * This routine reads a single integer from the specified @@ -37,7 +38,7 @@ * @param File open text file to read sample size from * @return Sample size * @note Globals: None - * @note Exceptions: ILLEGALSAMPLESIZE illegal format or range + * @note Exceptions: ILLEGALSAMPLESIZE illegal format or range * @note History: 6/6/89, DSJ, Created. */ uinT16 ReadSampleSize(FILE *File) { @@ -293,7 +294,7 @@ FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) { if (NumFloatsRead != 1) { if ((NumFloatsRead == EOF) && (i == 0)) { if (needs_free) { - Efree(Buffer); + Efree(Buffer); } return NULL; } else { @@ -315,8 +316,7 @@ FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) { * @note Exceptions: None * @note History: 6/6/89, DSJ, Created. */ -void -WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) { +void WriteParamDesc(FILE *File, uinT16 N, const PARAM_DESC ParamDesc[]) { int i; for (i = 0; i < N; i++) { @@ -446,15 +446,10 @@ void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) { * @note History: 6/12/89, DSJ, Created. */ -void WriteProtoList( - FILE *File, - uinT16 N, - PARAM_DESC ParamDesc[], - LIST ProtoList, - BOOL8 WriteSigProtos, - BOOL8 WriteInsigProtos) -{ - PROTOTYPE *Proto; +void WriteProtoList(FILE *File, uinT16 N, PARAM_DESC ParamDesc[], + LIST ProtoList, BOOL8 WriteSigProtos, + BOOL8 WriteInsigProtos) { + PROTOTYPE *Proto; /* write file header */ fprintf(File,"%0d\n",N); @@ -464,8 +459,8 @@ void WriteProtoList( iterate(ProtoList) { Proto = (PROTOTYPE *) first_node ( ProtoList ); - if (( Proto->Significant && WriteSigProtos ) || - ( ! Proto->Significant && WriteInsigProtos ) ) - WritePrototype( File, N, Proto ); + if ((Proto->Significant && WriteSigProtos) || + (!Proto->Significant && WriteInsigProtos)) + WritePrototype(File, N, Proto); } } diff --git a/classify/clusttool.h b/classify/clusttool.h index e82fa1ef48..a6fe38ea48 100644 --- a/classify/clusttool.h +++ b/classify/clusttool.h @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: clusttool.h - ** Purpose: Definition of clustering utility tools - ** Author: Dan Johnson - ** History: 6/6/89, DSJ, Created. + ** Filename: clusttool.h + ** Purpose: Definition of clustering utility tools + ** Author: Dan Johnson + ** History: 6/6/89, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -36,7 +36,7 @@ PROTOSTYLE ReadProtoStyle(FILE *File); FLOAT32 *ReadNFloats (FILE * File, uinT16 N, FLOAT32 Buffer[]); -void WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]); +void WriteParamDesc(FILE *File, uinT16 N, const PARAM_DESC ParamDesc[]); void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto); @@ -44,13 +44,9 @@ void WriteNFloats (FILE * File, uinT16 N, FLOAT32 Array[]); void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle); -void WriteProtoList( - FILE *File, - uinT16 N, - PARAM_DESC ParamDesc[], - LIST ProtoList, - BOOL8 WriteSigProtos, - BOOL8 WriteInsigProtos); +void WriteProtoList(FILE *File, uinT16 N, PARAM_DESC ParamDesc[], + LIST ProtoList, BOOL8 WriteSigProtos, + BOOL8 WriteInsigProtos); //--------------Global Data Definitions and Declarations--------------------- // define errors that can be trapped diff --git a/classify/cutoffs.cpp b/classify/cutoffs.cpp index 4f6417149a..ffb8692ef1 100644 --- a/classify/cutoffs.cpp +++ b/classify/cutoffs.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: cutoffs.c - ** Purpose: Routines to manipulate an array of class cutoffs. - ** Author: Dan Johnson - ** History: Wed Feb 20 09:28:51 1991, DSJ, Created. + ** Filename: cutoffs.c + ** Purpose: Routines to manipulate an array of class cutoffs. + ** Author: Dan Johnson + ** History: Wed Feb 20 09:28:51 1991, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at diff --git a/classify/featdefs.cpp b/classify/featdefs.cpp index ad7b799675..dd31f91d86 100644 --- a/classify/featdefs.cpp +++ b/classify/featdefs.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: featdefs.c - ** Purpose: Definitions of currently defined feature types. - ** Author: Dan Johnson - ** History: Mon May 21 10:26:21 1990, DSJ, Created. + ** Filename: featdefs.c + ** Purpose: Definitions of currently defined feature types. + ** Author: Dan Johnson + ** History: Mon May 21 10:26:21 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -289,13 +289,13 @@ CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, * the feature type for the feature with the specified short * name. Trap an error if the specified name is not found. * - * Globals: + * Globals: * - none * * @param FeatureDefs definitions of feature types/extractors * @param ShortName short name of a feature type * @return Feature type which corresponds to ShortName. - * @note Exceptions: + * @note Exceptions: * - ILLEGAL_SHORT_NAME * @note History: Wed May 23 15:36:05 1990, DSJ, Created. */ diff --git a/classify/fpoint.cpp b/classify/fpoint.cpp index 854bea7b7e..ff5b7b7cf7 100644 --- a/classify/fpoint.cpp +++ b/classify/fpoint.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: fpoint.c - ** Purpose: Abstract data type for a 2D point (floating point coords) - ** Author: Dan Johnson - ** History: Thu Apr 12 10:44:15 1990, DSJ, Created. + ** Filename: fpoint.c + ** Purpose: Abstract data type for a 2D point (floating point coords) + ** Author: Dan Johnson + ** History: Thu Apr 12 10:44:15 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -58,5 +58,4 @@ FLOAT32 NormalizedAngleFrom(FPOINT *Point1, if (Angle < 0.0 || Angle >= FullScale) Angle = 0.0; return (Angle); - } diff --git a/classify/intmatcher.cpp b/classify/intmatcher.cpp index 8fc135ea45..ff999608f2 100644 --- a/classify/intmatcher.cpp +++ b/classify/intmatcher.cpp @@ -295,7 +295,8 @@ class ClassPruner { HeapSort(num_classes_, sort_key_, sort_index_); } - /** Prints debug info on the class pruner matches for the pruned classes only. */ + /** Prints debug info on the class pruner matches for the pruned classes only. + */ void DebugMatch(const Classify& classify, const INT_TEMPLATES_STRUCT* int_templates, const INT_FEATURE_STRUCT* features) const { @@ -370,8 +371,9 @@ class ClassPruner { private: /** Array[rounded_classes_] of initial counts for each class. */ int *class_count_; - /// Array[rounded_classes_] of modified counts for each class after normalizing - /// for expected number of features, disabled classes, fragments, and xheights. + /// Array[rounded_classes_] of modified counts for each class after + /// normalizing for expected number of features, disabled classes, fragments, + /// and xheights. int *norm_count_; /** Array[rounded_classes_ +1] of pruned counts that gets sorted */ int *sort_key_; @@ -402,8 +404,9 @@ class ClassPruner { * normalization process (by CLASS_INDEX) * @param expected_num_features Array of expected number of features * for each class (by CLASS_INDEX) - * @param results Sorted Array of pruned classes. Must be an array - * of size at least int_templates->NumClasses. + * @param results Sorted Array of pruned classes. Must be an + * array of size at least + * int_templates->NumClasses. * @param keep_this */ int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, @@ -606,7 +609,6 @@ int IntegerMatcher::FindGoodProtos( return NumGoodProtos; } - /** * FindBadFeatures finds all features with maximum feature-evidence < * AdaptFeatureThresh. The list is ordered by increasing feature number. @@ -701,7 +703,6 @@ void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) { evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1); } - /*---------------------------------------------------------------------------- Private Code ----------------------------------------------------------------------------*/ @@ -717,8 +718,6 @@ void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) { class_template->NumConfigs * sizeof(feature_evidence_[0])); } - - /** * Print debugging information for Configuations * @return none @@ -742,7 +741,6 @@ void IMDebugConfiguration(int FeatureNum, cprintf ("\n"); } - /** * Print debugging information for Configuations * @return none @@ -795,10 +793,10 @@ int IntegerMatcher::UpdateTablesForFeature( uinT32 XFeatureAddress; uinT32 YFeatureAddress; uinT32 ThetaFeatureAddress; - uinT8 *UINT8Pointer; + uinT8* UINT8Pointer; int ProtoIndex; uinT8 Temp; - int *IntPointer; + int* IntPointer; int ConfigNum; inT32 M3; inT32 A3; @@ -916,7 +914,6 @@ int IntegerMatcher::UpdateTablesForFeature( return SumOverConfigs; } - /** * Print debugging information for Configuations * @return none @@ -1165,8 +1162,6 @@ void ScratchEvidence::UpdateSumOfProtoEvidences( } } - - /** * Normalize Sum of Proto and Feature Evidence by dividing by the sum of * the Feature Lengths and the Proto Lengths for each configuration. @@ -1180,7 +1175,6 @@ void ScratchEvidence::NormalizeSums( } } - /** * Find the best match for the current class and update the Result * with the configuration and match rating. diff --git a/classify/intmatcher.h b/classify/intmatcher.h index 46dbfc5a8d..df678d75ed 100644 --- a/classify/intmatcher.h +++ b/classify/intmatcher.h @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: intmatcher.h - ** Purpose: Interface to high level generic classifier routines. - ** Author: Robert Moss - ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created. + ** Filename: intmatcher.h + ** Purpose: Interface to high level generic classifier routines. + ** Author: Robert Moss + ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at diff --git a/classify/intproto.cpp b/classify/intproto.cpp index 4c2f0d9536..3bbb2777a0 100644 --- a/classify/intproto.cpp +++ b/classify/intproto.cpp @@ -326,10 +326,8 @@ int AddIntProto(INT_CLASS Class) { Word < Proto->Configs + WERDS_PER_CONFIG_VEC; *Word++ = 0); return (Index); - } - /** * This routine adds Proto to the class pruning tables * for the specified class in Templates. @@ -372,7 +370,6 @@ void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, } } /* AddProtoToClassPruner */ - /** * This routine updates the proto pruner lookup tables * for Class to include a new proto identified by ProtoId @@ -432,7 +429,6 @@ void AddProtoToProtoPruner(PROTO Proto, int ProtoId, FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug); } /* AddProtoToProtoPruner */ - /** * Returns a quantized bucket for the given param shifted by offset, * notionally (param + offset) * num_buckets, but clipped and casted to the @@ -550,7 +546,6 @@ void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { P->A, P->B, P->C, Class->ProtoLengths[ProtoId]); } /* ConvertProto */ - /** * This routine converts from the old floating point format * to the new integer format. @@ -627,7 +622,7 @@ INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos, * @note Exceptions: none * @note History: Thu Mar 21 14:45:04 1991, DSJ, Created. */ -void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) { +void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, FLOAT32 Evidence) { ScrollView::Color color = GetMatchColorFor(Evidence); RenderIntFeature(IntMatchWindow, Feature, color); if (FeatureDisplayWindow) { @@ -635,7 +630,6 @@ void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) { } } /* DisplayIntFeature */ - /** * This routine renders the specified proto into a * global display list. @@ -720,7 +714,6 @@ void free_int_class(INT_CLASS int_class) { Efree(int_class); } - /** * This routine allocates a new set of integer templates * initialized to hold 0 classes. @@ -1218,7 +1211,6 @@ FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) { } /* BucketStart */ - /** * This routine returns the parameter value which * corresponds to the end of the specified bucket. @@ -1236,7 +1228,6 @@ FLOAT32 BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets) { return (((FLOAT32) (Bucket + 1) / NumBuckets) - Offset); } /* BucketEnd */ - /** * This routine fills in the section of a class pruner * corresponding to a single x value for a single proto of @@ -1284,7 +1275,6 @@ void DoFill(FILL_SPEC *FillSpec, } } /* DoFill */ - /** * Return TRUE if the specified table filler is done, i.e. * if it has no more lines to fill. @@ -1306,7 +1296,6 @@ BOOL8 FillerDone(TABLE_FILLER *Filler) { } /* FillerDone */ - /** * This routine sets Bit in each bit vector whose * bucket lies within the range Center +- Spread. The fill @@ -1349,7 +1338,6 @@ void FillPPCircularBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], } /* FillPPCircularBits */ - /** * This routine sets Bit in each bit vector whose * bucket lies within the range Center +- Spread. The fill @@ -1516,7 +1504,6 @@ void GetCPPadsForLevel(int Level, } /* GetCPPadsForLevel */ - /** * @param Evidence evidence value to return color for * @return Color which corresponds to specified Evidence value. @@ -1538,7 +1525,6 @@ ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) { return ScrollView::BLUE; } /* GetMatchColorFor */ - /** * This routine returns (in Fill) the specification of * the next line to be filled from Filler. FillerDone() should @@ -1589,7 +1575,6 @@ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) { } /* GetNextFill */ - /** * This routine computes a data structure (Filler) * which can be used to fill in a rectangle surrounding @@ -1787,7 +1772,6 @@ void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, window->DrawTo(X + Dx, Y + Dy); } /* RenderIntFeature */ - /** * This routine extracts the parameters of the specified * proto from the class description and adds a rendering of diff --git a/classify/kdtree.cpp b/classify/kdtree.cpp index 61a94f66cc..6ba7086d23 100644 --- a/classify/kdtree.cpp +++ b/classify/kdtree.cpp @@ -70,11 +70,11 @@ class MinK { const Element* elements() { return elements_; } private: - const Key max_key_; //< the maximum possible Key - Element* elements_; //< unsorted array of elements + const Key max_key_; //< the maximum possible Key + Element *elements_; //< unsorted array of elements int elements_count_; //< the number of results collected so far - int k_; //< the number of results we want from the search - int max_index_; //< the index of the result with the largest key + int k_; //< the number of results we want from the search + int max_index_; //< the index of the result with the largest key }; template @@ -117,7 +117,8 @@ bool MinK::insert(Key key, Value value) { //----------------------------------------------------------------------------- -/** Helper class for searching for the k closest points to query_point in tree. */ +/** Helper class for searching for the k closest points to query_point in tree. + */ class KDTreeSearch { public: KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest); @@ -241,14 +242,13 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) { *PtrToNode = MakeKDNode(Tree, Key, (void *) Data, Level); } /* KDStore */ - /** - * This routine deletes a node from Tree. The node to be - * deleted is specified by the Key for the node and the Data - * contents of the node. These two pointers must be identical - * to the pointers that were used for the node when it was - * originally stored in the tree. A node will be deleted from - * the tree only if its key and data pointers are identical + * This routine deletes a node from Tree. The node to be + * deleted is specified by the Key for the node and the Data + * contents of the node. These two pointers must be identical + * to the pointers that were used for the node when it was + * originally stored in the tree. A node will be deleted from + * the tree only if its key and data pointers are identical * to Key and Data respectively. The tree is re-formed by removing * the affected subtree and inserting all elements but the root. * @@ -298,7 +298,6 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) { } } /* KDDelete */ - /** * This routine searches the K-D tree specified by Tree and * finds the QuerySize nearest neighbors of Query. All neighbors @@ -442,7 +441,7 @@ void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) { /*---------------------------------------------------------------------------*/ -/** +/** *Returns the Euclidean distance squared between p1 and p2 for all essential * dimensions. * @param k keys are in k-space @@ -541,7 +540,6 @@ void Walk(KDTREE *tree, void_proc action, void *context, Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level)); } - /** Given a subtree nodes, insert all of its elements into tree. */ void InsertNodes(KDTREE *tree, KDNODE *nodes) { if (nodes == NULL) diff --git a/classify/mf.cpp b/classify/mf.cpp index d0c59487e6..37cd2eca0a 100644 --- a/classify/mf.cpp +++ b/classify/mf.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: mf.c - ** Purpose: Micro-feature interface to flexible feature extractor. - ** Author: Dan Johnson - ** History: Thu May 24 09:08:38 1990, DSJ, Created. + ** Filename: mf.c + ** Purpose: Micro-feature interface to flexible feature extractor. + ** Author: Dan Johnson + ** History: Thu May 24 09:08:38 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -36,7 +36,7 @@ * Call the old micro-feature extractor and then copy * the features into the new format. Then deallocate the * old micro-features. - * @param Blob blob to extract micro-features from + * @param Blob blob to extract micro-features from * @param cn_denorm control parameter to feature extractor. * @return Micro-features for Blob. * @note Exceptions: none diff --git a/classify/mfdefs.cpp b/classify/mfdefs.cpp index abe8d0c71a..0f225e8b08 100644 --- a/classify/mfdefs.cpp +++ b/classify/mfdefs.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: mfdefs.c - ** Purpose: Basic routines for manipulating micro-features - ** Author: Dan Johnson - ** History: Mon Jan 22 08:48:58 1990, DSJ, Created. + ** Filename: mfdefs.c + ** Purpose: Basic routines for manipulating micro-features + ** Author: Dan Johnson + ** History: Mon Jan 22 08:48:58 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -32,7 +32,7 @@ * @return New MICROFEATURE * @note History: 7/27/89, DSJ, Created. */ -MICROFEATURE NewMicroFeature() { +MICROFEATURE NewMicroFeature() { return ((MICROFEATURE) Emalloc (sizeof (MFBLOCK))); } /* NewMicroFeature */ @@ -41,10 +41,10 @@ MICROFEATURE NewMicroFeature() { /** * This routine deallocates all of the memory consumed by * a list of micro-features. - * @param MicroFeatures list of micro-features to be freed + * @param MicroFeatures list of micro-features to be freed * @return none * @note History: 7/27/89, DSJ, Created. */ -void FreeMicroFeatures(MICROFEATURES MicroFeatures) { +void FreeMicroFeatures(MICROFEATURES MicroFeatures) { destroy_nodes(MicroFeatures, Efree); } /* FreeMicroFeatures */ diff --git a/classify/mfoutline.cpp b/classify/mfoutline.cpp index 511c34d41f..59593a8523 100644 --- a/classify/mfoutline.cpp +++ b/classify/mfoutline.cpp @@ -35,7 +35,8 @@ ----------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ -/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). */ +/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). + */ LIST ConvertBlob(TBLOB *blob) { LIST outlines = NIL_LIST; return (blob == NULL) @@ -344,7 +345,6 @@ void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { } /* ChangeDirection */ - /** * This routine normalizes each point in Outline by * translating it to the specified center and scaling it @@ -378,7 +378,6 @@ void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) { } /* CharNormalizeOutline */ - /** * This routine computes the slope from Start to Finish and * and then computes the approximate direction of the line diff --git a/classify/mfx.cpp b/classify/mfx.cpp index 3da4fb3d0f..6fd8ed5da5 100644 --- a/classify/mfx.cpp +++ b/classify/mfx.cpp @@ -128,7 +128,6 @@ FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) { return (Orientation); } /* ComputeOrientation */ - /** * Convert Outline to MicroFeatures * @param Outline outline to extract micro-features from @@ -164,7 +163,6 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, return (MicroFeatures); } /* ConvertToMicroFeatures */ - /** * This routine computes the feature parameters which describe * the micro-feature that starts and Start and ends at End. @@ -178,7 +176,7 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, * @return New micro-feature or NULL if the feature was rejected. * @note Globals: none * @note Exceptions: none - * @note History: + * @note History: * - 7/26/89, DSJ, Created. * - 11/17/89, DSJ, Added handling for Start and End same point. */ diff --git a/classify/mfx.h b/classify/mfx.h index 05ce29cee5..5ed006dcc7 100644 --- a/classify/mfx.h +++ b/classify/mfx.h @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: mfx.h - ** Purpose: Definition of micro-feature extraction routines - ** Author: Dan Johnson - ** History: 5/29/89, DSJ, Created. + ** Filename: mfx.h + ** Purpose: Definition of micro-feature extraction routines + ** Author: Dan Johnson + ** History: 5/29/89, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at diff --git a/classify/normfeat.cpp b/classify/normfeat.cpp index a4ac672a11..f297b3b05d 100644 --- a/classify/normfeat.cpp +++ b/classify/normfeat.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: normfeat.c - ** Purpose: Definition of char normalization features. - ** Author: Dan Johnson - ** History: 12/14/90, DSJ, Created. + ** Filename: normfeat.c + ** Purpose: Definition of char normalization features. + ** Author: Dan Johnson + ** History: 12/14/90, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -33,7 +33,6 @@ FLOAT32 ActualOutlineLength(FEATURE Feature) { return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION); } - /** * Return the character normalization feature for a blob. * diff --git a/classify/normmatch.cpp b/classify/normmatch.cpp index 488cd1652f..b0e066a89d 100644 --- a/classify/normmatch.cpp +++ b/classify/normmatch.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: normmatch.c - ** Purpose: Simple matcher based on character normalization features. - ** Author: Dan Johnson - ** History: Wed Dec 19 16:18:06 1990, DSJ, Created. + ** Filename: normmatch.c + ** Purpose: Simple matcher based on character normalization features. + ** Author: Dan Johnson + ** History: Wed Dec 19 16:18:06 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -197,10 +197,10 @@ double NormEvidenceOf(register double NormAdj) { /*---------------------------------------------------------------------------*/ /** * This routine dumps out detailed normalization match info. - * @param File open text file to dump match debug info to - * @param NumParams # of parameters in proto and feature - * @param Proto[] array of prototype parameters - * @param Feature[] array of feature parameters + * @param File open text file to dump match debug info to + * @param NumParams # of parameters in proto and feature + * @param Proto[] array of prototype parameters + * @param Feature[] array of feature parameters * Globals: none * @return none * @note Exceptions: none diff --git a/classify/ocrfeatures.cpp b/classify/ocrfeatures.cpp index 0895ed0886..7df8135048 100644 --- a/classify/ocrfeatures.cpp +++ b/classify/ocrfeatures.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: features.c - ** Purpose: Generic definition of a feature. - ** Author: Dan Johnson - ** History: Mon May 21 10:49:04 1990, DSJ, Created. + ** Filename: features.c + ** Purpose: Generic definition of a feature. + ** Author: Dan Johnson + ** History: Mon May 21 10:49:04 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -66,12 +66,11 @@ void FreeFeature(FEATURE Feature) { } /* FreeFeature */ - /** * Release the memory consumed by the specified feature * set. This routine also frees the memory consumed by the * features contained in the set. - * @param FeatureSet set of features to be freed + * @param FeatureSet set of features to be freed * @return none * @note History: Mon May 21 13:59:46 1990, DSJ, Created. */ @@ -85,11 +84,10 @@ void FreeFeatureSet(FEATURE_SET FeatureSet) { } } /* FreeFeatureSet */ - /** * Allocate and return a new feature of the specified * type. - * @param FeatureDesc description of feature to be created. + * @param FeatureDesc description of feature to be created. * @return New #FEATURE. * @note History: Mon May 21 14:06:42 1990, DSJ, Created. */ @@ -105,11 +103,10 @@ FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) { } /* NewFeature */ - /** * Allocate and return a new feature set large enough to * hold the specified number of features. - * @param NumFeatures maximum # of features to be put in feature set + * @param NumFeatures maximum # of features to be put in feature set * @return New #FEATURE_SET. * @note History: Mon May 21 14:22:40 1990, DSJ, Created. */ @@ -124,7 +121,6 @@ FEATURE_SET NewFeatureSet(int NumFeatures) { } /* NewFeatureSet */ - /** * Create a new feature of the specified type and read in * the value of its parameters from File. The extra penalty @@ -135,10 +131,11 @@ FEATURE_SET NewFeatureSet(int NumFeatures) { * @param File open text file to read feature from * @param FeatureDesc specifies type of feature to read from File * @return New #FEATURE read from File. - * @note Exceptions: #ILLEGAL_FEATURE_PARAM if text file doesn't match expected format + * @note Exceptions: #ILLEGAL_FEATURE_PARAM if text file doesn't match expected + * format * @note History: Wed May 23 08:53:16 1990, DSJ, Created. */ -FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { +FEATURE ReadFeature(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { FEATURE Feature; int i; @@ -153,7 +150,6 @@ FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { return (Feature); } /* ReadFeature */ - /** * Create a new feature set of the specified type and read in * the features from File. The correct text representation @@ -165,7 +161,7 @@ FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { * @return New feature set read from File. * @note History: Wed May 23 09:17:31 1990, DSJ, Created. */ -FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { +FEATURE_SET ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { FEATURE_SET FeatureSet; int NumFeatures; int i; @@ -180,7 +176,6 @@ FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { return (FeatureSet); } /* ReadFeatureSet */ - /** * Appends a textual representation of Feature to str. * This representation is simply a list of the N parameters @@ -203,7 +198,6 @@ void WriteFeature(FEATURE Feature, STRING* str) { *str += "\n"; } /* WriteFeature */ - /** * Write a textual representation of FeatureSet to File. * This representation is an integer specifying the number of @@ -224,7 +218,6 @@ void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) { } } /* WriteFeatureSet */ - /** * Write a textual representation of FeatureDesc to File * in the old format (i.e. the format used by the clusterer). @@ -240,7 +233,7 @@ void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) { * @return none * @note History: Fri May 25 15:27:18 1990, DSJ, Created. */ -void WriteOldParamDesc(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { +void WriteOldParamDesc(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { int i; fprintf (File, "%d\n", FeatureDesc->NumParams); diff --git a/classify/outfeat.cpp b/classify/outfeat.cpp index b1a4a9be90..76597f7c15 100644 --- a/classify/outfeat.cpp +++ b/classify/outfeat.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: outfeat.c - ** Purpose: Definition of outline-features. - ** Author: Dan Johnson - ** History: 11/13/90, DSJ, Created. + ** Filename: outfeat.c + ** Purpose: Definition of outline-features. + ** Author: Dan Johnson + ** History: 11/13/90, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -40,7 +40,7 @@ namespace tesseract { * @return Outline-features for Blob. * @note Globals: none * @note Exceptions: none - * @note History: + * @note History: * - 11/13/90, DSJ, Created. * - 05/24/91, DSJ, Updated for either char or baseline normalize. */ @@ -115,7 +115,7 @@ void AddOutlineFeatureToSet(FPOINT *Start, * @return none (results are returned in FeatureSet) * @note Globals: none * @note Exceptions: none - * @note History: + * @note History: * - 11/13/90, DSJ, Created. * - 5/24/91, DSJ, Added hidden edge capability. */ diff --git a/classify/picofeat.cpp b/classify/picofeat.cpp index 74beb18f35..a4a39263cf 100644 --- a/classify/picofeat.cpp +++ b/classify/picofeat.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: picofeat.c - ** Purpose: Definition of pico-features. - ** Author: Dan Johnson - ** History: 9/4/90, DSJ, Created. + ** Filename: picofeat.c + ** Purpose: Definition of pico-features. + ** Author: Dan Johnson + ** History: 9/4/90, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -98,7 +98,7 @@ FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { * nearest whole number of pico-features. The pico-features * are spaced evenly over the entire segment. * Globals: - * - classify_pico_feature_length length of a single pico-feature + * - classify_pico_feature_length length of a single pico-feature * @param Start starting point of pico-feature * @param End ending point of pico-feature * @param FeatureSet set to add pico-feature to diff --git a/cutil/bitvec.h b/cutil/bitvec.h index f70d748b91..d2a364d261 100644 --- a/cutil/bitvec.h +++ b/cutil/bitvec.h @@ -30,29 +30,29 @@ typedef uinT32 *BIT_VECTOR; /*----------------------------------------------------------------------------- Public Function Prototypes -----------------------------------------------------------------------------*/ -#define zero_all_bits(array,length) \ -{\ - int index; /*temporary index*/\ -\ -for (index=0;index #include - #else #include #include @@ -25,23 +23,24 @@ #include "thresholder.h" #if ON_APPLE -#include #include +#include #endif /* Convenience macro to test the version of Leptonica. */ #if defined(LIBLEPT_MAJOR_VERSION) && defined(LIBLEPT_MINOR_VERSION) -# define TESSERACT_LIBLEPT_PREREQ(maj, min) \ - ((LIBLEPT_MAJOR_VERSION) > (maj) || ((LIBLEPT_MAJOR_VERSION) == (maj) && (LIBLEPT_MINOR_VERSION) >= (min))) +#define TESSERACT_LIBLEPT_PREREQ(maj, min) \ + ((LIBLEPT_MAJOR_VERSION) > (maj) || \ + ((LIBLEPT_MAJOR_VERSION) == (maj) && (LIBLEPT_MINOR_VERSION) >= (min))) #else -# define TESSERACT_LIBLEPT_PREREQ(maj, min) 0 +#define TESSERACT_LIBLEPT_PREREQ(maj, min) 0 #endif -#if TESSERACT_LIBLEPT_PREREQ(1,73) -# define CALLOC LEPT_CALLOC -# define FREE LEPT_FREE +#if TESSERACT_LIBLEPT_PREREQ(1, 73) +#define CALLOC LEPT_CALLOC +#define FREE LEPT_FREE #endif #ifdef USE_OPENCL @@ -49,36 +48,28 @@ #include "opencl_device_selection.h" GPUEnv OpenclDevice::gpuEnv; - bool OpenclDevice::deviceIsSelected = false; ds_device OpenclDevice::selectedDevice; - int OpenclDevice::isInited = 0; static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC; static const l_uint32 lmask32[] = { - 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, - 0xf8000000, 0xfc000000, 0xfe000000, 0xff000000, - 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000, - 0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000, - 0xffff8000, 0xffffc000, 0xffffe000, 0xfffff000, - 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00, - 0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0, - 0xfffffff8, 0xfffffffc, 0xfffffffe, 0xffffffff -}; + 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, 0xf8000000, 0xfc000000, + 0xfe000000, 0xff000000, 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000, + 0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000, 0xffff8000, 0xffffc000, + 0xffffe000, 0xfffff000, 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00, + 0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0, 0xfffffff8, 0xfffffffc, + 0xfffffffe, 0xffffffff}; static const l_uint32 rmask32[] = { - 0x00000001, 0x00000003, 0x00000007, 0x0000000f, - 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, - 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, - 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, - 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff, - 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, - 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, - 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff -}; + 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f, + 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, + 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff, + 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, + 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff, + 0x7fffffff, 0xffffffff}; struct tiff_transform { int vflip; /* if non-zero, image needs a vertical fip */ @@ -99,7 +90,7 @@ static struct tiff_transform tiff_orientation_transforms[] = { {0, 0, -1} }; -static const l_int32 MAX_PAGES_IN_TIFF_FILE = 3000; +static const l_int32 MAX_PAGES_IN_TIFF_FILE = 3000; cl_mem pixsCLBuffer, pixdCLBuffer, pixdCLIntermediate; //Morph operations buffers cl_mem pixThBuffer; //output from thresholdtopix calculation @@ -109,7 +100,8 @@ KernelEnv rEnv; // substitute invalid characters in device name with _ void legalizeFileName( char *fileName) { //printf("fileName: %s\n", fileName); - const char* invalidChars = "/\?:*\"><| "; // space is valid but can cause headaches + const char *invalidChars = + "/\?:*\"><| "; // space is valid but can cause headaches // for each invalid char for (int i = 0; i < strlen(invalidChars); i++) { char invalidStr[4]; @@ -152,7 +144,6 @@ void populateGPUEnvFromDevice( GPUEnv *gpuInfo, cl_device_id device ) { cl_command_queue_properties queueProperties = 0; gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus ); CHECK_OPENCL( clStatus, "populateGPUEnv::createCommandQueue"); - } int OpenclDevice::LoadOpencl() @@ -168,7 +159,6 @@ int OpenclDevice::LoadOpencl() fprintf(stderr, "[OD] Load opencl.dll failed!\n"); FreeLibrary( static_cast( OpenclDll ) ); return 0; - } fprintf(stderr, "[OD] Load opencl.dll successful!\n"); #endif @@ -191,42 +181,37 @@ cl_mem allocateZeroCopyBuffer(KernelEnv rEnv, l_uint32 *hostbuffer, size_t nElem return membuffer; } -PIX* mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX* pixd, PIX* pixs, int elements, cl_mem_flags flags, bool memcopy = false, bool sync = true) -{ - PROCNAME("mapOutputCLBuffer"); - if (!pixd) - { - if (memcopy) - { - if ((pixd = pixCreateTemplate(pixs)) == NULL) - (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - else - { - if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs), pixGetDepth(pixs))) == NULL) - (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - } - l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer(rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0, - elements * sizeof(l_uint32), 0, NULL, NULL, NULL ); - - if (memcopy) - { - memcpy(pixGetData(pixd), pValues, elements * sizeof(l_uint32)); - } - else - { - pixSetData(pixd, pValues); +PIX *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX *pixd, PIX *pixs, + int elements, cl_mem_flags flags, bool memcopy = false, + bool sync = true) { + PROCNAME("mapOutputCLBuffer"); + if (!pixd) { + if (memcopy) { + if ((pixd = pixCreateTemplate(pixs)) == NULL) + (PIX *)ERROR_PTR("pixd not made", procName, NULL); + } else { + if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs), + pixGetDepth(pixs))) == NULL) + (PIX *)ERROR_PTR("pixd not made", procName, NULL); } + } + l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer( + rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0, + elements * sizeof(l_uint32), 0, NULL, NULL, NULL); + + if (memcopy) { + memcpy(pixGetData(pixd), pValues, elements * sizeof(l_uint32)); + } else { + pixSetData(pixd, pValues); + } - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,clbuffer,pValues,0,NULL,NULL); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, clbuffer, pValues, 0, NULL, NULL); - if (sync) - { - clFinish( rEnv.mpkCmdQueue ); - } + if (sync) { + clFinish(rEnv.mpkCmdQueue); + } - return pixd; + return pixd; } cl_mem allocateIntBuffer( KernelEnv rEnv, const l_uint32 *_pValues, size_t nElements, cl_int *pStatus , bool sync = false) @@ -261,7 +246,7 @@ void OpenclDevice::releaseMorphCLBuffers() clReleaseMemObject(pixdCLBuffer); if (pixThBuffer != NULL) clReleaseMemObject(pixThBuffer); - pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = NULL; + pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = NULL; } int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs) @@ -305,7 +290,6 @@ PERF_COUNT_SUB("LoadOpencl") #endif // sets up environment, compiles programs - InitOpenclRunEnv_DeviceSelection( 0 ); //PERF_COUNT_SUB("called InitOpenclRunEnv_DS") //PERF_COUNT_END @@ -418,17 +402,18 @@ int OpenclDevice::BinaryGenerated( const char * clFileName, FILE ** fhandle ) int status = 0; char *str = NULL; FILE *fd = NULL; - char fileName[256] = { 0 }, cl_name[128] = { 0 }; + char fileName[256] = {0}, cl_name[128] = {0}; char deviceName[1024]; - clStatus = clGetDeviceInfo( gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL ); - CHECK_OPENCL( clStatus, "clGetDeviceInfo" ); - str = (char*) strstr( clFileName, (char*) ".cl" ); - memcpy( cl_name, clFileName, str - clFileName ); + clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, + sizeof(deviceName), deviceName, NULL); + CHECK_OPENCL(clStatus, "clGetDeviceInfo"); + str = (char *)strstr(clFileName, (char *)".cl"); + memcpy(cl_name, clFileName, str - clFileName); cl_name[str - clFileName] = '\0'; - sprintf( fileName, "%s-%s.bin", cl_name, deviceName ); + sprintf(fileName, "%s-%s.bin", cl_name, deviceName); legalizeFileName(fileName); - fd = fopen( fileName, "rb" ); - status = ( fd != NULL ) ? 1 : 0; + fd = fopen(fileName, "rb"); + status = (fd != NULL) ? 1 : 0; if ( fd != NULL ) { *fhandle = fd; @@ -848,7 +833,6 @@ PIX *pix; fclose(fp); PERF_COUNT_END return pix; - } TIFF * OpenclDevice::fopenTiffCl(FILE *fp, @@ -1049,8 +1033,8 @@ tiffCloseCallback(thandle_t handle) *mstream->poutdata = mstream->buffer; *mstream->poutsize = mstream->hw; } - FREE(mstream); /* never free the buffer! */ - return 0; + FREE(mstream); /* never free the buffer! */ + return 0; } @@ -1144,38 +1128,38 @@ OpenclDevice::pixReadMemTiffCl(const l_uint8 *data,size_t size,l_int32 n) l_int32 i, pagefound; PIX *pix; TIFF *tif; - //L_MEMSTREAM *memStream; - PROCNAME("pixReadMemTiffCl"); - - if (!data) - return (PIX *)ERROR_PTR("data pointer is NULL", procName, NULL); - - if ((tif = fopenTiffMemstream("", "r", (l_uint8 **)&data, &size)) == NULL) - return (PIX *)ERROR_PTR("tif not opened", procName, NULL); - - pagefound = FALSE; - pix = NULL; - for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) { - if (i == n) { - pagefound = TRUE; - if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) { - TIFFCleanup(tif); - return (PIX *)ERROR_PTR("pix not read", procName, NULL); - } - break; - } - if (TIFFReadDirectory(tif) == 0) - break; - } + // L_MEMSTREAM *memStream; + PROCNAME("pixReadMemTiffCl"); - if (pagefound == FALSE) { - L_WARNING("tiff page %d not found", procName, i); - TIFFCleanup(tif); - return NULL; - } + if (!data) + return (PIX *)ERROR_PTR("data pointer is NULL", procName, NULL); + + if ((tif = fopenTiffMemstream("", "r", (l_uint8 **)&data, &size)) == + NULL) + return (PIX *)ERROR_PTR("tif not opened", procName, NULL); + + pagefound = FALSE; + pix = NULL; + for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) { + if (i == n) { + pagefound = TRUE; + if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) { + TIFFCleanup(tif); + return (PIX *)ERROR_PTR("pix not read", procName, NULL); + } + break; + } + if (TIFFReadDirectory(tif) == 0) break; + } - TIFFCleanup(tif); - return pix; + if (pagefound == FALSE) { + L_WARNING("tiff page %d not found", procName, i); + TIFFCleanup(tif); + return NULL; + } + + TIFFCleanup(tif); + return pix; } PIX * @@ -1262,7 +1246,6 @@ void compare(l_uint32 *cpu, l_uint32 *gpu,int size) } } printf("\nit matches\n"); - } //OpenCL implementation of pixReadFromTiffStream. @@ -1285,7 +1268,6 @@ PIXCMAP *cmap; if (!tif) return (PIX *)ERROR_PTR("tif not defined", procName, NULL); - TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps); TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp); bpp = bps * spp; @@ -1308,16 +1290,15 @@ PIXCMAP *cmap; wpl = pixGetWpl(pix); bpl = 4 * wpl; - if (spp == 1) { if ((linebuf = (l_uint8 *)CALLOC(tiffbpl + 1, sizeof(l_uint8))) == NULL) return (PIX *)ERROR_PTR("calloc fail for linebuf", procName, NULL); for (i = 0 ; i < h ; i++) { if (TIFFReadScanline(tif, linebuf, i, 0) < 0) { - FREE(linebuf); - pixDestroy(&pix); - return (PIX *)ERROR_PTR("line read fail", procName, NULL); + FREE(linebuf); + pixDestroy(&pix); + return (PIX *)ERROR_PTR("line read fail", procName, NULL); } memcpy((char *)data, (char *)linebuf, tiffbpl); data += bpl; @@ -1325,30 +1306,29 @@ PIXCMAP *cmap; if (bps <= 8) pixEndianByteSwap(pix); else - pixEndianTwoByteSwap(pix); + pixEndianTwoByteSwap(pix); FREE(linebuf); - } - else { - if ((tiffdata = (l_uint32 *)CALLOC(w * h, sizeof(l_uint32))) == NULL) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("calloc fail for tiffdata", procName, NULL); - } - if (!TIFFReadRGBAImageOriented(tif, w, h, (uint32 *)tiffdata, - ORIENTATION_TOPLEFT, 0)) { - FREE(tiffdata); - pixDestroy(&pix); - return (PIX *)ERROR_PTR("failed to read tiffdata", procName, NULL); - } - line = pixGetData(pix); + } else { + if ((tiffdata = (l_uint32 *)CALLOC(w * h, sizeof(l_uint32))) == NULL) { + pixDestroy(&pix); + return (PIX *)ERROR_PTR("calloc fail for tiffdata", procName, NULL); + } + if (!TIFFReadRGBAImageOriented(tif, w, h, (uint32 *)tiffdata, + ORIENTATION_TOPLEFT, 0)) { + FREE(tiffdata); + pixDestroy(&pix); + return (PIX *)ERROR_PTR("failed to read tiffdata", procName, NULL); + } + line = pixGetData(pix); - //Invoke the OpenCL kernel for pixReadFromTiff - l_uint32* output_gpu=pixReadFromTiffKernel(tiffdata,w,h,wpl,line); + // Invoke the OpenCL kernel for pixReadFromTiff + l_uint32 *output_gpu = pixReadFromTiffKernel(tiffdata, w, h, wpl, line); - pixSetData(pix, output_gpu); - // pix already has data allocated, it now points to output_gpu? - FREE(tiffdata); - FREE(line); - //FREE(output_gpu); + pixSetData(pix, output_gpu); + // pix already has data allocated, it now points to output_gpu? + FREE(tiffdata); + FREE(line); + // FREE(output_gpu); } if (getTiffStreamResolutionCl(tif, &xres, &yres) == 0) { @@ -1362,7 +1342,6 @@ PIXCMAP *cmap; pixSetInputFormat(pix, comptype); if (TIFFGetField(tif, TIFFTAG_COLORMAP, &redmap, &greenmap, &bluemap)) { - if ((cmap = pixcmapCreate(bps)) == NULL) { pixDestroy(&pix); return (PIX *)ERROR_PTR("cmap not made", procName, NULL); @@ -1372,22 +1351,19 @@ PIXCMAP *cmap; pixcmapAddColor(cmap, redmap[i] >> 8, greenmap[i] >> 8, bluemap[i] >> 8); pixSetColormap(pix, cmap); - } - else { - if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) { - - if (tiffcomp == COMPRESSION_CCITTFAX3 || - tiffcomp == COMPRESSION_CCITTFAX4 || - tiffcomp == COMPRESSION_CCITTRLE || - tiffcomp == COMPRESSION_CCITTRLEW) { - photometry = PHOTOMETRIC_MINISWHITE; - } - else - photometry = PHOTOMETRIC_MINISBLACK; - } - if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) || - (d == 8 && photometry == PHOTOMETRIC_MINISWHITE)) - pixInvert(pix, pix); + } else { + if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) { + if (tiffcomp == COMPRESSION_CCITTFAX3 || + tiffcomp == COMPRESSION_CCITTFAX4 || + tiffcomp == COMPRESSION_CCITTRLE || + tiffcomp == COMPRESSION_CCITTRLEW) { + photometry = PHOTOMETRIC_MINISWHITE; + } else + photometry = PHOTOMETRIC_MINISBLACK; + } + if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) || + (d == 8 && photometry == PHOTOMETRIC_MINISWHITE)) + pixInvert(pix, pix); } if (TIFFGetField(tif, TIFFTAG_ORIENTATION, &orientation)) { @@ -1635,95 +1611,50 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) if (xp > 31 || xn > 31) { - //Generic case. - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateHor", &status ); - - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(xp), - (const void *)&xp); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(xn), - (const void *)&xn); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(wpl), - (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(h), - (const void *)&h); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); - - if (yp > 0 || yn > 0) - { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; + // Generic case. + rEnv.mpkKernel = + clCreateKernel(rEnv.mpkProgram, "morphoDilateHor", &status); + + status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), (const void *)&xn); + status = + clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), (const void *)&wpl); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), (const void *)&h); + status = + clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); + + if (yp > 0 || yn > 0) { + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; } } else if (xp > 0 || xn > 0 ) { - //Specific Horizontal pass kernel for half width < 32 - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateHor_32word", &status ); - isEven = (xp != xn); - - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(xp), - (const void *)&xp); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(wpl), - (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(h), - (const void *)&h); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(isEven), - (const void *)&isEven); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); - - if (yp > 0 || yn > 0) - { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; + // Specific Horizontal pass kernel for half width < 32 + rEnv.mpkKernel = + clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_32word", &status); + isEven = (xp != xn); + + status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); + status = + clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), (const void *)&wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), (const void *)&h); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isEven), + (const void *)&isEven); + status = + clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); + + if (yp > 0 || yn > 0) { + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; } } @@ -1766,253 +1697,153 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) NULL); } - return status; } //Morphology Erode operation. Invokes the relevant OpenCL kernels -cl_int -pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) -{ - - l_int32 xp, yp, xn, yn; - SEL* sel; - size_t globalThreads[2]; - size_t localThreads[2]; - cl_mem pixtemp; - cl_int status; - int gsize; - char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC); - l_uint32 rwmask, lwmask; - char isEven; - - sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); - - selFindMaxTranslations(sel, &xp, &yp, &xn, &yn); - selDestroy(&sel); - OpenclDevice::SetKernelEnv( &rEnv ); - - if (hsize == 5 && vsize == 5 && isAsymmetric) - { - //Specific kernel for 5x5 - status = pixErodeCL_55(wpl, h); - return status; - } - - lwmask = lmask32[31 - (xn & 31)]; - rwmask = rmask32[31 - (xp & 31)]; - - //global and local work dimensions for Horizontal pass - gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - //Horizontal Pass - if (xp > 31 || xn > 31 ) - { - //Generic case. - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeHor", &status ); - - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(xp), - (const void *)&xp); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(xn), - (const void *)&xn); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(wpl), - (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(h), - (const void *)&h); - status = clSetKernelArg(rEnv.mpkKernel, - 6, - sizeof(isAsymmetric), - (const void *)&isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, - 7, - sizeof(rwmask), - (const void *)&rwmask); - status = clSetKernelArg(rEnv.mpkKernel, - 8, - sizeof(lwmask), - (const void *)&lwmask); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); - - if (yp > 0 || yn > 0) - { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } - } - else if (xp > 0 || xn > 0) - { - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeHor_32word", &status ); - isEven = (xp != xn); - - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(xp), - (const void *)&xp); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(wpl), - (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(h), - (const void *)&h); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(isAsymmetric), - (const void *)&isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, - 6, - sizeof(rwmask), - (const void *)&rwmask); - status = clSetKernelArg(rEnv.mpkKernel, - 7, - sizeof(lwmask), - (const void *)&lwmask); - status = clSetKernelArg(rEnv.mpkKernel, - 8, - sizeof(isEven), - (const void *)&isEven); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); +cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) { + l_int32 xp, yp, xn, yn; + SEL *sel; + size_t globalThreads[2]; + size_t localThreads[2]; + cl_mem pixtemp; + cl_int status; + int gsize; + char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC); + l_uint32 rwmask, lwmask; + char isEven; + + sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); + + selFindMaxTranslations(sel, &xp, &yp, &xn, &yn); + selDestroy(&sel); + OpenclDevice::SetKernelEnv(&rEnv); + + if (hsize == 5 && vsize == 5 && isAsymmetric) { + // Specific kernel for 5x5 + status = pixErodeCL_55(wpl, h); + return status; + } - if (yp > 0 || yn > 0) - { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } + lwmask = lmask32[31 - (xn & 31)]; + rwmask = rmask32[31 - (xp & 31)]; + + // global and local work dimensions for Horizontal pass + gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; + globalThreads[0] = gsize; + gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; + globalThreads[1] = gsize; + localThreads[0] = GROUPSIZE_X; + localThreads[1] = GROUPSIZE_Y; + + // Horizontal Pass + if (xp > 31 || xn > 31) { + // Generic case. + rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeHor", &status); + + status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), (const void *)&xn); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), (const void *)&wpl); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), (const void *)&h); + status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(isAsymmetric), + (const void *)&isAsymmetric); + status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(rwmask), + (const void *)&rwmask); + status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(lwmask), + (const void *)&lwmask); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); + + if (yp > 0 || yn > 0) { + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; + } + } else if (xp > 0 || xn > 0) { + rEnv.mpkKernel = + clCreateKernel(rEnv.mpkProgram, "morphoErodeHor_32word", &status); + isEven = (xp != xn); + + status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), (const void *)&wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), (const void *)&h); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), + (const void *)&isAsymmetric); + status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(rwmask), + (const void *)&rwmask); + status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(lwmask), + (const void *)&lwmask); + status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(isEven), + (const void *)&isEven); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); + + if (yp > 0 || yn > 0) { + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; } + } - //Vertical Pass - if (yp > 0 || yn > 0) - { - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeVer", &status ); - - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(yp), - (const void *)&yp); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(wpl), - (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(h), - (const void *)&h); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(isAsymmetric), - (const void *)&isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, - 6, - sizeof(yn), - (const void *)&yn); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); - } + // Vertical Pass + if (yp > 0 || yn > 0) { + rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeVer", &status); + + status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), (const void *)&yp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), (const void *)&wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), (const void *)&h); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), + (const void *)&isAsymmetric); + status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(yn), (const void *)&yn); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); + } - return status; + return status; } // OpenCL implementation of Morphology Dilate //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy = false) -{ - l_uint32 wpl, h; +PIX *OpenclDevice::pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, + l_int32 vsize, bool reqDataCopy = false) { + l_uint32 wpl, h; - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); - clStatus = pixDilateCL(hsize, vsize, wpl, h); + clStatus = pixDilateCL(hsize, vsize, wpl, h); - if (reqDataCopy) - { - pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ, false); - } + if (reqDataCopy) { + pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, + CL_MAP_READ, false); + } - return pixd; + return pixd; } // OpenCL implementation of Morphology Erode //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy = false) -{ - l_uint32 wpl, h; +PIX *OpenclDevice::pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, + l_int32 vsize, bool reqDataCopy = false) { + l_uint32 wpl, h; - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); - clStatus = pixErodeCL(hsize, vsize, wpl, h); + clStatus = pixErodeCL(hsize, vsize, wpl, h); - if (reqDataCopy) - { - pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ); - } + if (reqDataCopy) { + pixd = + mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ); + } - return pixd; + return pixd; } //Morphology Open operation. Invokes the relevant OpenCL kernels @@ -2055,50 +1886,40 @@ pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) // OpenCL implementation of Morphology Close //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixCloseBrickCL(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize, - bool reqDataCopy = false) -{ - l_uint32 wpl, h; +PIX *OpenclDevice::pixCloseBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, + l_int32 vsize, bool reqDataCopy = false) { + l_uint32 wpl, h; - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); - clStatus = pixCloseCL(hsize, vsize, wpl, h); + clStatus = pixCloseCL(hsize, vsize, wpl, h); - if (reqDataCopy) - { - pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ); - } + if (reqDataCopy) { + pixd = + mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ); + } - return pixd; + return pixd; } // OpenCL implementation of Morphology Open //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixOpenBrickCL(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize, - bool reqDataCopy = false) -{ - l_uint32 wpl, h; +PIX *OpenclDevice::pixOpenBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, + l_int32 vsize, bool reqDataCopy = false) { + l_uint32 wpl, h; - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); - clStatus = pixOpenCL(hsize, vsize, wpl, h); + clStatus = pixOpenCL(hsize, vsize, wpl, h); - if (reqDataCopy) - { - pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ); - } + if (reqDataCopy) { + pixd = + mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ); + } - return pixd; + return pixd; } //pix OR operation: outbuffer = buffer1 | buffer2 @@ -2262,19 +2083,16 @@ pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_ // OpenCL implementation of Subtract pix //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2, bool reqDataCopy = false) -{ - l_uint32 wpl, h; +PIX *OpenclDevice::pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2, + bool reqDataCopy = false) { + l_uint32 wpl, h; - PROCNAME("pixSubtractCL"); + PROCNAME("pixSubtractCL"); - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixGetDepth(pixs1) != pixGetDepth(pixs2)) - return (PIX *)ERROR_PTR("depths of pixs* unequal", procName, pixd); + if (!pixs1) return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); + if (!pixs2) return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); + if (pixGetDepth(pixs1) != pixGetDepth(pixs2)) + return (PIX *)ERROR_PTR("depths of pixs* unequal", procName, pixd); #if EQUAL_SIZE_WARNING if (!pixSizesEqual(pixs1, pixs2)) @@ -2297,200 +2115,207 @@ OpenclDevice::pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2, bool reqDataCo // OpenCL implementation of Hollow pix //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixHollowCL(PIX *pixd, - PIX *pixs, - l_int32 close_hsize, - l_int32 close_vsize, - l_int32 open_hsize, - l_int32 open_vsize, - bool reqDataCopy = false) -{ - l_uint32 wpl, h; - cl_mem pixtemp; - - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); - - //First step : Close Morph operation: Dilate followed by Erode - clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h); - - //Store the output of close operation in an intermediate buffer - //this will be later used for pixsubtract - clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0, sizeof(int) * wpl*h, 0, NULL, NULL); - - //Second step: Open Operation - Erode followed by Dilate - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h); - - //Third step: Subtract : (Close - Open) - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixdCLIntermediate; - pixdCLIntermediate = pixtemp; - - clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer); - - if (reqDataCopy) - { - //Read back output data from OCL buffer to cpu - pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ); - } - return pixd; +PIX *OpenclDevice::pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize, + l_int32 close_vsize, l_int32 open_hsize, + l_int32 open_vsize, bool reqDataCopy = false) { + l_uint32 wpl, h; + cl_mem pixtemp; + + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); + + // First step : Close Morph operation: Dilate followed by Erode + clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h); + + // Store the output of close operation in an intermediate buffer + // this will be later used for pixsubtract + clStatus = + clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, + 0, sizeof(int) * wpl * h, 0, NULL, NULL); + + // Second step: Open Operation - Erode followed by Dilate + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; + + clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h); + + // Third step: Subtract : (Close - Open) + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixdCLIntermediate; + pixdCLIntermediate = pixtemp; + + clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer); + + if (reqDataCopy) { + // Read back output data from OCL buffer to cpu + pixd = + mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ); + } + return pixd; } // OpenCL implementation of Get Lines from pix function //Note: Assumes the source and dest opencl buffer are initialized. No check done -void -OpenclDevice::pixGetLinesCL(PIX *pixd, - PIX *pixs, - PIX** pix_vline, - PIX** pix_hline, - PIX** pixClosed, - bool getpixClosed, - l_int32 close_hsize, l_int32 close_vsize, - l_int32 open_hsize, l_int32 open_vsize, - l_int32 line_hsize, l_int32 line_vsize) -{ - l_uint32 wpl, h; - cl_mem pixtemp; - - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); - - //First step : Close Morph operation: Dilate followed by Erode - clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h); - - //Copy the Close output to CPU buffer - if (getpixClosed) - { - *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs, wpl*h, CL_MAP_READ, true, false); - } +void OpenclDevice::pixGetLinesCL(PIX *pixd, PIX *pixs, PIX **pix_vline, + PIX **pix_hline, PIX **pixClosed, + bool getpixClosed, l_int32 close_hsize, + l_int32 close_vsize, l_int32 open_hsize, + l_int32 open_vsize, l_int32 line_hsize, + l_int32 line_vsize) { + l_uint32 wpl, h; + cl_mem pixtemp; + + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); + + // First step : Close Morph operation: Dilate followed by Erode + clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h); + + // Copy the Close output to CPU buffer + if (getpixClosed) { + *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs, + wpl * h, CL_MAP_READ, true, false); + } - //Store the output of close operation in an intermediate buffer - //this will be later used for pixsubtract - clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0, sizeof(int) * wpl*h, 0, NULL, NULL); + // Store the output of close operation in an intermediate buffer + // this will be later used for pixsubtract + clStatus = + clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, + 0, sizeof(int) * wpl * h, 0, NULL, NULL); - //Second step: Open Operation - Erode followed by Dilate - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; + // Second step: Open Operation - Erode followed by Dilate + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; - clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h); + clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h); - //Third step: Subtract : (Close - Open) - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixdCLIntermediate; - pixdCLIntermediate = pixtemp; + // Third step: Subtract : (Close - Open) + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixdCLIntermediate; + pixdCLIntermediate = pixtemp; - clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer); + clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer); - //Store the output of Hollow operation in an intermediate buffer - //this will be later used - clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0, sizeof(int) * wpl*h, 0, NULL, NULL); + // Store the output of Hollow operation in an intermediate buffer + // this will be later used + clStatus = + clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, + 0, sizeof(int) * wpl * h, 0, NULL, NULL); - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; - //Fourth step: Get vertical line - //pixOpenBrick(NULL, pix_hollow, 1, min_line_length); - clStatus = pixOpenCL(1, line_vsize, wpl, h); + // Fourth step: Get vertical line + // pixOpenBrick(NULL, pix_hollow, 1, min_line_length); + clStatus = pixOpenCL(1, line_vsize, wpl, h); - //Copy the vertical line output to CPU buffer - *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl*h, CL_MAP_READ, true, false); + // Copy the vertical line output to CPU buffer + *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl * h, + CL_MAP_READ, true, false); - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLIntermediate; - pixdCLIntermediate = pixtemp; + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLIntermediate; + pixdCLIntermediate = pixtemp; - //Fifth step: Get horizontal line - //pixOpenBrick(NULL, pix_hollow, min_line_length, 1); - clStatus = pixOpenCL(line_hsize, 1, wpl, h); + // Fifth step: Get horizontal line + // pixOpenBrick(NULL, pix_hollow, min_line_length, 1); + clStatus = pixOpenCL(line_hsize, 1, wpl, h); - //Copy the horizontal line output to CPU buffer - *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl*h, CL_MAP_READ, true, true); + // Copy the horizontal line output to CPU buffer + *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl * h, + CL_MAP_READ, true, true); - return; + return; } - /************************************************************************* * HistogramRect * Otsu Thresholding Operations * histogramAllChannels is laid out as all channel 0, then all channel 1... * only supports 1 or 4 channels (bytes_per_pixel) ************************************************************************/ -int OpenclDevice::HistogramRectOCL( - const unsigned char* imageData, - int bytes_per_pixel, - int bytes_per_line, - int left, // always 0 - int top, // always 0 - int width, - int height, - int kHistogramSize, - int* histogramAllChannels) -{ -PERF_COUNT_START("HistogramRectOCL") - cl_int clStatus; - int retVal= 0; - KernelEnv histKern; - SetKernelEnv( &histKern ); - KernelEnv histRedKern; - SetKernelEnv( &histRedKern ); - /* map imagedata to device as read only */ - // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be coherent which we don't need. - // faster option would be to allocate initial image buffer - // using a garlic bus memory type - cl_mem imageBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, width*height*bytes_per_pixel*sizeof(char), (void *)imageData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer"); - - /* setup work group size parameters */ - int block_size = 256; - cl_uint numCUs; - clStatus = clGetDeviceInfo( gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numCUs), &numCUs, NULL); - CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer"); - - int requestedOccupancy = 10; - int numWorkGroups = numCUs * requestedOccupancy; - int numThreads = block_size*numWorkGroups; - size_t local_work_size[] = {static_cast(block_size)}; - size_t global_work_size[] = {static_cast(numThreads)}; - size_t red_global_work_size[] = {static_cast(block_size*kHistogramSize*bytes_per_pixel)}; - - /* map histogramAllChannels as write only */ - int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups; - - cl_mem histogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, kHistogramSize*bytes_per_pixel*sizeof(int), (void *)histogramAllChannels, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer histogramBuffer"); - - /* intermediate histogram buffer */ - int histRed = 256; - int tmpHistogramBins = kHistogramSize*bytes_per_pixel*histRed; - - cl_mem tmpHistogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE, tmpHistogramBins*sizeof(cl_uint), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer tmpHistogramBuffer"); - - /* atomic sync buffer */ - int *zeroBuffer = new int[1]; - zeroBuffer[0] = 0; - cl_mem atomicSyncBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int), (void *)zeroBuffer, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer atomicSyncBuffer"); - delete[] zeroBuffer; - //Create kernel objects based on bytes_per_pixel - if (bytes_per_pixel == 1) - { - histKern.mpkKernel = clCreateKernel( histKern.mpkProgram, "kernel_HistogramRectOneChannel", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectOneChannel"); - - histRedKern.mpkKernel = clCreateKernel( histRedKern.mpkProgram, "kernel_HistogramRectOneChannelReduction", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectOneChannelReduction"); - } else { +int OpenclDevice::HistogramRectOCL(const unsigned char *imageData, + int bytes_per_pixel, int bytes_per_line, + int left, // always 0 + int top, // always 0 + int width, int height, int kHistogramSize, + int *histogramAllChannels) { + PERF_COUNT_START("HistogramRectOCL") + cl_int clStatus; + int retVal = 0; + KernelEnv histKern; + SetKernelEnv(&histKern); + KernelEnv histRedKern; + SetKernelEnv(&histRedKern); + /* map imagedata to device as read only */ + // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be + // coherent which we don't need. + // faster option would be to allocate initial image buffer + // using a garlic bus memory type + cl_mem imageBuffer = clCreateBuffer( + histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + width * height * bytes_per_pixel * sizeof(char), (void *)imageData, + &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); + + /* setup work group size parameters */ + int block_size = 256; + cl_uint numCUs; + clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(numCUs), &numCUs, NULL); + CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); + + int requestedOccupancy = 10; + int numWorkGroups = numCUs * requestedOccupancy; + int numThreads = block_size * numWorkGroups; + size_t local_work_size[] = {static_cast(block_size)}; + size_t global_work_size[] = {static_cast(numThreads)}; + size_t red_global_work_size[] = { + static_cast(block_size * kHistogramSize * bytes_per_pixel)}; + + /* map histogramAllChannels as write only */ + int numBins = kHistogramSize * bytes_per_pixel * numWorkGroups; + + cl_mem histogramBuffer = clCreateBuffer( + histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, + kHistogramSize * bytes_per_pixel * sizeof(int), + (void *)histogramAllChannels, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer histogramBuffer"); + + /* intermediate histogram buffer */ + int histRed = 256; + int tmpHistogramBins = kHistogramSize * bytes_per_pixel * histRed; + + cl_mem tmpHistogramBuffer = + clCreateBuffer(histKern.mpkContext, CL_MEM_READ_WRITE, + tmpHistogramBins * sizeof(cl_uint), NULL, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer tmpHistogramBuffer"); + + /* atomic sync buffer */ + int *zeroBuffer = new int[1]; + zeroBuffer[0] = 0; + cl_mem atomicSyncBuffer = clCreateBuffer( + histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + sizeof(cl_int), (void *)zeroBuffer, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer atomicSyncBuffer"); + delete[] zeroBuffer; + // Create kernel objects based on bytes_per_pixel + if (bytes_per_pixel == 1) { + histKern.mpkKernel = clCreateKernel( + histKern.mpkProgram, "kernel_HistogramRectOneChannel", &clStatus); + CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectOneChannel"); + + histRedKern.mpkKernel = + clCreateKernel(histRedKern.mpkProgram, + "kernel_HistogramRectOneChannelReduction", &clStatus); + CHECK_OPENCL(clStatus, + "clCreateKernel kernel_HistogramRectOneChannelReduction"); + } else { histKern.mpkKernel = clCreateKernel( histKern.mpkProgram, "kernel_HistogramRectAllChannels", &clStatus ); CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectAllChannels"); @@ -2534,10 +2359,9 @@ PERF_COUNT_SUB("before") 0, NULL, NULL ); CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels" ); clFinish( histKern.mpkCmdQueue ); - if(clStatus !=0) - { - retVal = -1; - } + if (clStatus != 0) { + retVal = -1; + } /* launch histogram */ clStatus = clEnqueueNDRangeKernel( histRedKern.mpkCmdQueue, @@ -2546,27 +2370,24 @@ PERF_COUNT_SUB("before") 0, NULL, NULL ); CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction" ); clFinish( histRedKern.mpkCmdQueue ); - if(clStatus !=0) - { - retVal = -1; - } -PERF_COUNT_SUB("redKernel") + if (clStatus != 0) { + retVal = -1; + } + PERF_COUNT_SUB("redKernel") /* map results back from gpu */ ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE, CL_MAP_READ, 0, kHistogramSize*bytes_per_pixel*sizeof(int), 0, NULL, NULL, &clStatus); CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer"); - if(clStatus !=0) - { - retVal = -1; - } + if (clStatus != 0) { + retVal = -1; + } clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0, NULL, NULL); clReleaseMemObject(histogramBuffer); clReleaseMemObject(imageBuffer); PERF_COUNT_SUB("after") PERF_COUNT_END - return retVal; - +return retVal; } /************************************************************************* @@ -2574,111 +2395,122 @@ PERF_COUNT_END * from the class, using thresholds/hi_values to the output IMAGE. * only supports 1 or 4 channels ************************************************************************/ -int OpenclDevice::ThresholdRectToPixOCL( - const unsigned char* imageData, - int bytes_per_pixel, - int bytes_per_line, - const int* thresholds, - const int* hi_values, - Pix** pix, - int height, - int width, - int top, - int left) { -PERF_COUNT_START("ThresholdRectToPixOCL") - int retVal =0; - /* create pix result buffer */ - *pix = pixCreate(width, height, 1); - uinT32* pixData = pixGetData(*pix); - int wpl = pixGetWpl(*pix); - int pixSize = wpl*height*sizeof(uinT32); // number of pixels - - cl_int clStatus; - KernelEnv rEnv; - SetKernelEnv( &rEnv ); - - /* setup work group size parameters */ - int block_size = 256; - cl_uint numCUs = 6; - clStatus = clGetDeviceInfo( gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numCUs), &numCUs, NULL); - CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer"); - - int requestedOccupancy = 10; - int numWorkGroups = numCUs * requestedOccupancy; - int numThreads = block_size*numWorkGroups; - size_t local_work_size[] = {(size_t) block_size}; - size_t global_work_size[] = {(size_t) numThreads}; - - /* map imagedata to device as read only */ - // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be coherent which we don't need. - // faster option would be to allocate initial image buffer - // using a garlic bus memory type - cl_mem imageBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, width*height*bytes_per_pixel*sizeof(char), (void *)imageData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer"); - - /* map pix as write only */ - pixThBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, pixSize, (void *)pixData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer pix"); - - /* map thresholds and hi_values */ - cl_mem thresholdsBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, bytes_per_pixel*sizeof(int), (void *)thresholds, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer thresholdBuffer"); - cl_mem hiValuesBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, bytes_per_pixel*sizeof(int), (void *)hi_values, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer hiValuesBuffer"); - - /* compile kernel */ - if (bytes_per_pixel == 4) { - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "kernel_ThresholdRectToPix", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_ThresholdRectToPix"); - } else { - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "kernel_ThresholdRectToPix_OneChan", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_ThresholdRectToPix_OneChan"); - } +int OpenclDevice::ThresholdRectToPixOCL(const unsigned char *imageData, + int bytes_per_pixel, int bytes_per_line, + const int *thresholds, + const int *hi_values, Pix **pix, + int height, int width, int top, + int left) { + PERF_COUNT_START("ThresholdRectToPixOCL") + int retVal = 0; + /* create pix result buffer */ + *pix = pixCreate(width, height, 1); + uinT32 *pixData = pixGetData(*pix); + int wpl = pixGetWpl(*pix); + int pixSize = wpl * height * sizeof(uinT32); // number of pixels + + cl_int clStatus; + KernelEnv rEnv; + SetKernelEnv(&rEnv); + + /* setup work group size parameters */ + int block_size = 256; + cl_uint numCUs = 6; + clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(numCUs), &numCUs, NULL); + CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); + + int requestedOccupancy = 10; + int numWorkGroups = numCUs * requestedOccupancy; + int numThreads = block_size * numWorkGroups; + size_t local_work_size[] = {(size_t)block_size}; + size_t global_work_size[] = {(size_t)numThreads}; + + /* map imagedata to device as read only */ + // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be + // coherent which we don't need. + // faster option would be to allocate initial image buffer + // using a garlic bus memory type + cl_mem imageBuffer = + clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + width * height * bytes_per_pixel * sizeof(char), + (void *)imageData, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); + + /* map pix as write only */ + pixThBuffer = + clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, + pixSize, (void *)pixData, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer pix"); + + /* map thresholds and hi_values */ + cl_mem thresholdsBuffer = clCreateBuffer( + rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + bytes_per_pixel * sizeof(int), (void *)thresholds, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer thresholdBuffer"); + cl_mem hiValuesBuffer = clCreateBuffer( + rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + bytes_per_pixel * sizeof(int), (void *)hi_values, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer hiValuesBuffer"); + + /* compile kernel */ + if (bytes_per_pixel == 4) { + rEnv.mpkKernel = + clCreateKernel(rEnv.mpkProgram, "kernel_ThresholdRectToPix", &clStatus); + CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix"); + } else { + rEnv.mpkKernel = clCreateKernel( + rEnv.mpkProgram, "kernel_ThresholdRectToPix_OneChan", &clStatus); + CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix_OneChan"); + } - /* set kernel arguments */ - clStatus = clSetKernelArg( rEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&imageBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg imageBuffer"); - cl_uint numPixels = width*height; - clStatus = clSetKernelArg( rEnv.mpkKernel, 1, sizeof(int), (void *)&height ); - CHECK_OPENCL( clStatus, "clSetKernelArg height" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 2, sizeof(int), (void *)&width ); - CHECK_OPENCL( clStatus, "clSetKernelArg width" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 3, sizeof(int), (void *)&wpl ); - CHECK_OPENCL( clStatus, "clSetKernelArg wpl" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 4, sizeof(cl_mem), (void *)&thresholdsBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg thresholdsBuffer" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 5, sizeof(cl_mem), (void *)&hiValuesBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg hiValuesBuffer" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 6, sizeof(cl_mem), (void *)&pixThBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg pixThBuffer"); - - /* launch kernel & wait */ -PERF_COUNT_SUB("before") - clStatus = clEnqueueNDRangeKernel( - rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 1, NULL, global_work_size, local_work_size, - 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix" ); - clFinish( rEnv.mpkCmdQueue ); -PERF_COUNT_SUB("kernel") - if(clStatus !=0) - { - printf("Setting return value to -1\n"); - retVal = -1; - } - /* map results back from gpu */ - void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0, pixSize, 0, NULL, NULL, &clStatus); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer"); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, NULL, NULL); + /* set kernel arguments */ + clStatus = + clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&imageBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); + cl_uint numPixels = width * height; + clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(int), (void *)&height); + CHECK_OPENCL(clStatus, "clSetKernelArg height"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(int), (void *)&width); + CHECK_OPENCL(clStatus, "clSetKernelArg width"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(int), (void *)&wpl); + CHECK_OPENCL(clStatus, "clSetKernelArg wpl"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), + (void *)&thresholdsBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg thresholdsBuffer"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(cl_mem), + (void *)&hiValuesBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg hiValuesBuffer"); + clStatus = + clSetKernelArg(rEnv.mpkKernel, 6, sizeof(cl_mem), (void *)&pixThBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg pixThBuffer"); + + /* launch kernel & wait */ + PERF_COUNT_SUB("before") + clStatus = + clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 1, NULL, + global_work_size, local_work_size, 0, NULL, NULL); + CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix"); + clFinish(rEnv.mpkCmdQueue); + PERF_COUNT_SUB("kernel") + if (clStatus != 0) { + printf("Setting return value to -1\n"); + retVal = -1; + } + /* map results back from gpu */ + void *ptr = + clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0, + pixSize, 0, NULL, NULL, &clStatus); + CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer"); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, NULL, NULL); - clReleaseMemObject(imageBuffer); - clReleaseMemObject(thresholdsBuffer); - clReleaseMemObject(hiValuesBuffer); + clReleaseMemObject(imageBuffer); + clReleaseMemObject(thresholdsBuffer); + clReleaseMemObject(hiValuesBuffer); -PERF_COUNT_SUB("after") -PERF_COUNT_END -return retVal; + PERF_COUNT_SUB("after") + PERF_COUNT_END + return retVal; } @@ -2755,7 +2587,6 @@ void populateTessScoreEvaluationInputData( TessScoreEvaluationInputData *input ) float fractionBlack = 0.1; // how much of the image should be blackened int numSpots = (height*width)*fractionBlack/(maxLineWidth*maxLineWidth/2/2); for (int i = 0; i < numSpots; i++) { - int lineWidth = rand()%maxLineWidth; int col = lineWidth + rand()%(width-2*lineWidth); int row = lineWidth + rand()%(height-2*lineWidth); @@ -2786,15 +2617,14 @@ typedef struct _TessDeviceScore { *****************************************************************************/ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) { - double time = 0; #if ON_WINDOWS LARGE_INTEGER freq, time_funct_start, time_funct_end; QueryPerformanceFrequency(&freq); #elif ON_APPLE - mach_timebase_info_data_t info = { 0, 0 }; + mach_timebase_info_data_t info = {0, 0}; mach_timebase_info(&info); - long long start,stop; + long long start, stop; #else timespec time_funct_start, time_funct_end; #endif @@ -2805,8 +2635,8 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif @@ -2817,9 +2647,9 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -2828,8 +2658,8 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu } else { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif @@ -2842,7 +2672,6 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu int idx = 0; for (i = 0; i < input.height ; i++) { for (j = 0; j < input.width; j++) { - l_uint32 tiffword = tiffdata[i * input.width + j]; l_int32 rval = ((tiffword) & 0xff); l_int32 gval = (((tiffword) >> 8) & 0xff); @@ -2855,9 +2684,9 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -2872,15 +2701,14 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu } double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) { - double time; #if ON_WINDOWS LARGE_INTEGER freq, time_funct_start, time_funct_end; QueryPerformanceFrequency(&freq); -#elif ON_APPLE - mach_timebase_info_data_t info = { 0, 0 }; +#elif ON_APPLE + mach_timebase_info_data_t info = {0, 0}; mach_timebase_info(&info); - long long start,stop; + long long start, stop; #else timespec time_funct_start, time_funct_end; #endif @@ -2892,58 +2720,56 @@ double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, int kHistogramSize = 256; int bytes_per_line = input.width*input.numChannels; int *histogramAllChannels = new int[kHistogramSize*input.numChannels]; - int retVal= 0; + int retVal = 0; // function call if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif OpenclDevice::gpuEnv = *env; int wpl = pixGetWpl(input.pix); - retVal= OpenclDevice::HistogramRectOCL(input.imageData, input.numChannels, bytes_per_line, top, left, input.width, input.height, kHistogramSize, histogramAllChannels); + retVal = OpenclDevice::HistogramRectOCL( + input.imageData, input.numChannels, bytes_per_line, top, left, + input.width, input.height, kHistogramSize, histogramAllChannels); #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - if(retVal ==0) - { - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; - } - else - { - time= FLT_MAX; - } +#elif ON_APPLE + stop = mach_absolute_time(); + if (retVal == 0) { + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + } else { + time = FLT_MAX; + } #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; #endif } else { - int *histogram = new int[kHistogramSize]; #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif for (int ch = 0; ch < input.numChannels; ++ch) { - tesseract::HistogramRect(input.pix, input.numChannels, - left, top, input.width, input.height, histogram); + tesseract::HistogramRect(input.pix, input.numChannels, left, top, + input.width, input.height, histogram); } #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -2995,16 +2821,15 @@ void ThresholdRectToPix_Native(const unsigned char* imagedata, } double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) { - double time; - int retVal =0; + int retVal = 0; #if ON_WINDOWS LARGE_INTEGER freq, time_funct_start, time_funct_end; QueryPerformanceFrequency(&freq); -#elif ON_APPLE - mach_timebase_info_data_t info = { 0, 0 }; +#elif ON_APPLE + mach_timebase_info_data_t info = {0, 0}; mach_timebase_info(&info); - long long start,stop; + long long start, stop; #else timespec time_funct_start, time_funct_end; #endif @@ -3030,29 +2855,29 @@ double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData i if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif OpenclDevice::gpuEnv = *env; int wpl = pixGetWpl(input.pix); - retVal= OpenclDevice::ThresholdRectToPixOCL(input.imageData, input.numChannels, bytes_per_line, thresholds, hi_values, &input.pix, input.height, input.width, top, left); + retVal = OpenclDevice::ThresholdRectToPixOCL( + input.imageData, input.numChannels, bytes_per_line, thresholds, + hi_values, &input.pix, input.height, input.width, top, left); #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - if(retVal ==0) - { - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9;; - } - else - { - time= FLT_MAX; - } +#elif ON_APPLE + stop = mach_absolute_time(); + if (retVal == 0) { + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + ; + } else { + time = FLT_MAX; + } #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); @@ -3065,8 +2890,8 @@ double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData i thresholder.SetImage( input.pix ); #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif @@ -3076,9 +2901,9 @@ double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData i #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -3097,10 +2922,10 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in #if ON_WINDOWS LARGE_INTEGER freq, time_funct_start, time_funct_end; QueryPerformanceFrequency(&freq); -#elif ON_APPLE - mach_timebase_info_data_t info = { 0, 0 }; +#elif ON_APPLE + mach_timebase_info_data_t info = {0, 0}; mach_timebase_info(&info); - long long start,stop; + long long start, stop; #else timespec time_funct_start, time_funct_end; #endif @@ -3118,8 +2943,8 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif @@ -3134,9 +2959,9 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -3144,8 +2969,8 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in } else { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif @@ -3163,9 +2988,9 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -3183,7 +3008,6 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in #include "stdlib.h" - // encode score object as byte string ds_status serializeScore( ds_device* device, void **serializedScore, unsigned int* serializedScoreSize ) { *serializedScoreSize = sizeof(TessDeviceScore); @@ -3200,14 +3024,13 @@ ds_status deserializeScore( ds_device* device, const unsigned char* serializedSc return DS_SUCCESS; } -ds_status releaseScore( void* score ) { +ds_status releaseScore(void *score) { delete (TessDeviceScore *)score; return DS_SUCCESS; } // evaluate devices ds_status evaluateScoreForDevice( ds_device *device, void *inputData) { - // overwrite statuc gpuEnv w/ current device // so native opencl calls can be used; they use static gpuEnv printf("\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native" ); @@ -3243,14 +3066,12 @@ ds_status evaluateScoreForDevice( ds_device *device, void *inputData) { float composeRGBPixelWeight = 1.2f; float histogramRectWeight = 2.4f; float thresholdRectToPixWeight = 4.5f; - float getLineMasksMorphWeight = 5.0f; - - float weightedTime = - composeRGBPixelWeight * composeRGBPixelTime + - histogramRectWeight * histogramRectTime + - thresholdRectToPixWeight * thresholdRectToPixTime + - getLineMasksMorphWeight * getLineMasksMorphTime - ; + float getLineMasksMorphWeight = 5.0f; + + float weightedTime = composeRGBPixelWeight * composeRGBPixelTime + + histogramRectWeight * histogramRectTime + + thresholdRectToPixWeight * thresholdRectToPixTime + + getLineMasksMorphWeight * getLineMasksMorphTime; device->score = (void *)new TessDeviceScore; ((TessDeviceScore *)device->score)->time = weightedTime; @@ -3266,83 +3087,105 @@ ds_status evaluateScoreForDevice( ds_device *device, void *inputData) { // initial call to select device ds_device OpenclDevice::getDeviceSelection( ) { if (!deviceIsSelected) { -PERF_COUNT_START("getDeviceSelection") - // check if opencl is available at runtime - if( 1 == LoadOpencl() ) { - // opencl is available -//PERF_COUNT_SUB("LoadOpencl") - // setup devices - ds_status status; - ds_profile *profile; - status = initDSProfile( &profile, "v0.1" ); -PERF_COUNT_SUB("initDSProfile") - // try reading scores from file - const char *fileName = "tesseract_opencl_profile_devices.dat"; - status = readProfileFromFile( profile, deserializeScore, fileName); - if (status != DS_SUCCESS) { - // need to run evaluation - printf("[DS] Profile file not available (%s); performing profiling.\n", fileName); - - // create input data - TessScoreEvaluationInputData input; - populateTessScoreEvaluationInputData( &input ); -//PERF_COUNT_SUB("populateTessScoreEvaluationInputData") - // perform evaluations - unsigned int numUpdates; - status = profileDevices( profile, DS_EVALUATE_ALL, evaluateScoreForDevice, (void *)&input, &numUpdates ); -PERF_COUNT_SUB("profileDevices") - // write scores to file - if ( status == DS_SUCCESS ) { - status = writeProfileToFile( profile, serializeScore, fileName); -PERF_COUNT_SUB("writeProfileToFile") - if ( status == DS_SUCCESS ) { - printf("[DS] Scores written to file (%s).\n", fileName); + PERF_COUNT_START("getDeviceSelection") + // check if opencl is available at runtime + if (1 == LoadOpencl()) { + // opencl is available + // PERF_COUNT_SUB("LoadOpencl") + // setup devices + ds_status status; + ds_profile *profile; + status = initDSProfile(&profile, "v0.1"); + PERF_COUNT_SUB("initDSProfile") + // try reading scores from file + const char *fileName = "tesseract_opencl_profile_devices.dat"; + status = readProfileFromFile(profile, deserializeScore, fileName); + if (status != DS_SUCCESS) { + // need to run evaluation + printf("[DS] Profile file not available (%s); performing profiling.\n", + fileName); + + // create input data + TessScoreEvaluationInputData input; + populateTessScoreEvaluationInputData(&input); + // PERF_COUNT_SUB("populateTessScoreEvaluationInputData") + // perform evaluations + unsigned int numUpdates; + status = + profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, + (void *)&input, &numUpdates); + PERF_COUNT_SUB("profileDevices") + // write scores to file + if (status == DS_SUCCESS) { + status = writeProfileToFile(profile, serializeScore, fileName); + PERF_COUNT_SUB("writeProfileToFile") + if (status == DS_SUCCESS) { + printf("[DS] Scores written to file (%s).\n", fileName); + } else { + printf( + "[DS] Error saving scores to file (%s); scores not written to " + "file.\n", + fileName); + } } else { - printf("[DS] Error saving scores to file (%s); scores not written to file.\n", fileName); + printf( + "[DS] Unable to evaluate performance; scores not written to " + "file.\n"); } } else { - printf("[DS] Unable to evaluate performance; scores not written to file.\n"); + PERF_COUNT_SUB("readProfileFromFile") + printf("[DS] Profile read from file (%s).\n", fileName); } - } else { - -PERF_COUNT_SUB("readProfileFromFile") - printf("[DS] Profile read from file (%s).\n", fileName); - } - - // we now have device scores either from file or evaluation - // select fastest using custom Tesseract selection algorithm - float bestTime = FLT_MAX; // begin search with worst possible time - int bestDeviceIdx = -1; - for (int d = 0; d < profile->numDevices; d++) { - ds_device device = profile->devices[d]; - TessDeviceScore score = *(TessDeviceScore *)device.score; - float time = score.time; - printf("[DS] Device[%i] %i:%s score is %f\n", d+1, device.type, device.oclDeviceName, time); - if (time < bestTime) { - bestTime = time; + // we now have device scores either from file or evaluation + // select fastest using custom Tesseract selection algorithm + float bestTime = FLT_MAX; // begin search with worst possible time + int bestDeviceIdx = -1; + for (int d = 0; d < profile->numDevices; d++) { + ds_device device = profile->devices[d]; + TessDeviceScore score = *(TessDeviceScore *)device.score; + + float time = score.time; + printf("[DS] Device[%i] %i:%s score is %f\n", d + 1, device.type, + device.oclDeviceName, time); + if (time < bestTime) { + bestTime = time; bestDeviceIdx = d; + } } - } - printf("[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx+1, profile->devices[bestDeviceIdx].oclDeviceName, profile->devices[bestDeviceIdx].type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); - // cleanup - // TODO: call destructor for profile object? + printf("[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1, + profile->devices[bestDeviceIdx].oclDeviceName, + profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE + ? "OpenCL" + : "Native"); + // cleanup + // TODO: call destructor for profile object? bool overridden = false; char *overrideDeviceStr = getenv("TESSERACT_OPENCL_DEVICE"); if (overrideDeviceStr != NULL) { int overrideDeviceIdx = atoi(overrideDeviceStr); - if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices ) { - printf("[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, %i)\n", overrideDeviceStr, overrideDeviceIdx); + if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices) { + printf( + "[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, " + "%i)\n", + overrideDeviceStr, overrideDeviceIdx); bestDeviceIdx = overrideDeviceIdx - 1; overridden = true; } else { - printf("[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are valid devices).\n", overrideDeviceStr, profile->numDevices); + printf( + "[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are " + "valid devices).\n", + overrideDeviceStr, profile->numDevices); } } if (overridden) { - printf("[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx+1, profile->devices[bestDeviceIdx].oclDeviceName, profile->devices[bestDeviceIdx].type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); + printf("[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1, + profile->devices[bestDeviceIdx].oclDeviceName, + profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE + ? "OpenCL" + : "Native"); } selectedDevice = profile->devices[bestDeviceIdx]; // cleanup @@ -3357,10 +3200,10 @@ PERF_COUNT_SUB("readProfileFromFile") selectedDevice.oclDriverVersion = NULL; } deviceIsSelected = true; -PERF_COUNT_SUB("select from Profile") -PERF_COUNT_END + PERF_COUNT_SUB("select from Profile") + PERF_COUNT_END } -//PERF_COUNT_END + // PERF_COUNT_END return selectedDevice; } @@ -3375,8 +3218,6 @@ bool OpenclDevice::selectedDeviceIsNativeCPU() { return (device.type == DS_DEVICE_NATIVE_CPU); } - - /*! * pixConvertRGBToGray() from leptonica, converted to opencl kernel * @@ -3388,111 +3229,113 @@ bool OpenclDevice::selectedDeviceIsNativeCPU() { * Notes: * (1) Use a weighted average of the RGB values. */ -#define SET_DATA_BYTE( pdata, n, val ) (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val)) +#define SET_DATA_BYTE(pdata, n, val) \ + (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val)) -Pix * OpenclDevice::pixConvertRGBToGrayOCL( - Pix *srcPix, // 32-bit source - float rwt, - float gwt, - float bwt ) -{ -PERF_COUNT_START("pixConvertRGBToGrayOCL") - Pix *dstPix; // 8-bit destination +Pix *OpenclDevice::pixConvertRGBToGrayOCL(Pix *srcPix, // 32-bit source + float rwt, float gwt, float bwt) { + PERF_COUNT_START("pixConvertRGBToGrayOCL") + Pix *dstPix; // 8-bit destination - if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0) return NULL; + if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0) return NULL; - if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0) { - // magic numbers from leptonica - rwt = 0.3; - gwt = 0.5; - bwt = 0.2; - } - // normalize - float sum = rwt + gwt + bwt; - rwt /= sum; - gwt /= sum; - bwt /= sum; - - // source pix - int w, h; - pixGetDimensions(srcPix, &w, &h, NULL); - //printf("Image is %i x %i\n", w, h); - unsigned int *srcData = pixGetData(srcPix); - int srcWPL = pixGetWpl(srcPix); - int srcSize = srcWPL * h * sizeof(unsigned int); - - // destination pix - if ((dstPix = pixCreate(w, h, 8)) == NULL) - return NULL; - pixCopyResolution(dstPix, srcPix); - unsigned int *dstData = pixGetData(dstPix); - int dstWPL = pixGetWpl(dstPix); - int dstWords = dstWPL * h; - int dstSize = dstWords * sizeof(unsigned int); - //printf("dstSize = %i\n", dstSize); -PERF_COUNT_SUB("pix setup") - - // opencl objects - cl_int clStatus; - KernelEnv kEnv; - SetKernelEnv( &kEnv ); - - // source buffer - cl_mem srcBuffer = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, srcSize, (void *)srcData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer srcBuffer"); - - // destination buffer - cl_mem dstBuffer = clCreateBuffer( kEnv.mpkContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, dstSize, (void *)dstData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer dstBuffer"); - - // setup work group size parameters - int block_size = 256; - int numWorkGroups = ((h*w+block_size-1) / block_size ); - int numThreads = block_size*numWorkGroups; - size_t local_work_size[] = {static_cast(block_size)}; - size_t global_work_size[] = {static_cast(numThreads)}; - //printf("Enqueueing %i threads for %i output pixels\n", numThreads, w*h); - - /* compile kernel */ - kEnv.mpkKernel = clCreateKernel( kEnv.mpkProgram, "kernel_RGBToGray", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_RGBToGray"); - - - /* set kernel arguments */ - clStatus = clSetKernelArg( kEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&srcBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg srcBuffer"); - clStatus = clSetKernelArg( kEnv.mpkKernel, 1, sizeof(cl_mem), (void *)&dstBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg dstBuffer"); - clStatus = clSetKernelArg( kEnv.mpkKernel, 2, sizeof(int), (void *)&srcWPL ); - CHECK_OPENCL( clStatus, "clSetKernelArg srcWPL" ); - clStatus = clSetKernelArg( kEnv.mpkKernel, 3, sizeof(int), (void *)&dstWPL ); - CHECK_OPENCL( clStatus, "clSetKernelArg dstWPL" ); - clStatus = clSetKernelArg( kEnv.mpkKernel, 4, sizeof(int), (void *)&h ); - CHECK_OPENCL( clStatus, "clSetKernelArg height" ); - clStatus = clSetKernelArg( kEnv.mpkKernel, 5, sizeof(int), (void *)&w ); - CHECK_OPENCL( clStatus, "clSetKernelArg width" ); - clStatus = clSetKernelArg( kEnv.mpkKernel, 6, sizeof(float), (void *)&rwt ); - CHECK_OPENCL( clStatus, "clSetKernelArg rwt" ); - clStatus = clSetKernelArg( kEnv.mpkKernel, 7, sizeof(float), (void *)&gwt ); - CHECK_OPENCL( clStatus, "clSetKernelArg gwt"); - clStatus = clSetKernelArg( kEnv.mpkKernel, 8, sizeof(float), (void *)&bwt ); - CHECK_OPENCL( clStatus, "clSetKernelArg bwt"); - - /* launch kernel & wait */ -PERF_COUNT_SUB("before") - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, - kEnv.mpkKernel, - 1, NULL, global_work_size, local_work_size, - 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_RGBToGray" ); - clFinish( kEnv.mpkCmdQueue ); -PERF_COUNT_SUB("kernel") - - /* map results back from gpu */ - void *ptr = clEnqueueMapBuffer(kEnv.mpkCmdQueue, dstBuffer, CL_TRUE, CL_MAP_READ, 0, dstSize, 0, NULL, NULL, &clStatus); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer dstBuffer"); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, dstBuffer, ptr, 0, NULL, NULL); + if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0) { + // magic numbers from leptonica + rwt = 0.3; + gwt = 0.5; + bwt = 0.2; + } + // normalize + float sum = rwt + gwt + bwt; + rwt /= sum; + gwt /= sum; + bwt /= sum; + + // source pix + int w, h; + pixGetDimensions(srcPix, &w, &h, NULL); + // printf("Image is %i x %i\n", w, h); + unsigned int *srcData = pixGetData(srcPix); + int srcWPL = pixGetWpl(srcPix); + int srcSize = srcWPL * h * sizeof(unsigned int); + + // destination pix + if ((dstPix = pixCreate(w, h, 8)) == NULL) return NULL; + pixCopyResolution(dstPix, srcPix); + unsigned int *dstData = pixGetData(dstPix); + int dstWPL = pixGetWpl(dstPix); + int dstWords = dstWPL * h; + int dstSize = dstWords * sizeof(unsigned int); + // printf("dstSize = %i\n", dstSize); + PERF_COUNT_SUB("pix setup") + + // opencl objects + cl_int clStatus; + KernelEnv kEnv; + SetKernelEnv(&kEnv); + + // source buffer + cl_mem srcBuffer = + clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + srcSize, (void *)srcData, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer srcBuffer"); + + // destination buffer + cl_mem dstBuffer = + clCreateBuffer(kEnv.mpkContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, + dstSize, (void *)dstData, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer dstBuffer"); + + // setup work group size parameters + int block_size = 256; + int numWorkGroups = ((h * w + block_size - 1) / block_size); + int numThreads = block_size * numWorkGroups; + size_t local_work_size[] = {static_cast(block_size)}; + size_t global_work_size[] = {static_cast(numThreads)}; + // printf("Enqueueing %i threads for %i output pixels\n", numThreads, w*h); + + /* compile kernel */ + kEnv.mpkKernel = + clCreateKernel(kEnv.mpkProgram, "kernel_RGBToGray", &clStatus); + CHECK_OPENCL(clStatus, "clCreateKernel kernel_RGBToGray"); + + /* set kernel arguments */ + clStatus = + clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&srcBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg srcBuffer"); + clStatus = + clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), (void *)&dstBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg dstBuffer"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(int), (void *)&srcWPL); + CHECK_OPENCL(clStatus, "clSetKernelArg srcWPL"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(int), (void *)&dstWPL); + CHECK_OPENCL(clStatus, "clSetKernelArg dstWPL"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(int), (void *)&h); + CHECK_OPENCL(clStatus, "clSetKernelArg height"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 5, sizeof(int), (void *)&w); + CHECK_OPENCL(clStatus, "clSetKernelArg width"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 6, sizeof(float), (void *)&rwt); + CHECK_OPENCL(clStatus, "clSetKernelArg rwt"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 7, sizeof(float), (void *)&gwt); + CHECK_OPENCL(clStatus, "clSetKernelArg gwt"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 8, sizeof(float), (void *)&bwt); + CHECK_OPENCL(clStatus, "clSetKernelArg bwt"); + + /* launch kernel & wait */ + PERF_COUNT_SUB("before") + clStatus = + clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, NULL, + global_work_size, local_work_size, 0, NULL, NULL); + CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_RGBToGray"); + clFinish(kEnv.mpkCmdQueue); + PERF_COUNT_SUB("kernel") + + /* map results back from gpu */ + void *ptr = + clEnqueueMapBuffer(kEnv.mpkCmdQueue, dstBuffer, CL_TRUE, CL_MAP_READ, 0, + dstSize, 0, NULL, NULL, &clStatus); + CHECK_OPENCL(clStatus, "clEnqueueMapBuffer dstBuffer"); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, dstBuffer, ptr, 0, NULL, NULL); #if 0 // validate: compute on cpu @@ -3529,13 +3372,12 @@ PERF_COUNT_SUB("kernel") //printf("\n"); } #endif - // release opencl objects - clReleaseMemObject(srcBuffer); - clReleaseMemObject(dstBuffer); - + // release opencl objects + clReleaseMemObject(srcBuffer); + clReleaseMemObject(dstBuffer); -PERF_COUNT_END - // success - return dstPix; + PERF_COUNT_END + // success + return dstPix; } #endif diff --git a/opencl/openclwrapper.h b/opencl/openclwrapper.h index f7e9ad7891..cfd612aa98 100644 --- a/opencl/openclwrapper.h +++ b/opencl/openclwrapper.h @@ -19,7 +19,8 @@ // including CL/cl.h doesn't occur until USE_OPENCL defined below // platform preprocessor commands -#if defined( WIN32 ) || defined( __WIN32__ ) || defined( _WIN32 ) || defined( __CYGWIN__ ) || defined( __MINGW32__ ) +#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \ + defined(__CYGWIN__) || defined(__MINGW32__) #define ON_WINDOWS 1 #define ON_LINUX 0 #define ON_APPLE 0 @@ -89,21 +90,23 @@ time_sub_start = time_funct_start; \ time_sub_end = time_funct_start; -#define PERF_COUNT_END \ - QueryPerformanceCounter(&time_funct_end); \ - elapsed_time_sec = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); \ - printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); +#define PERF_COUNT_END \ + QueryPerformanceCounter(&time_funct_end); \ + elapsed_time_sec = (time_funct_end.QuadPart - time_funct_start.QuadPart) / \ + (double)(freq.QuadPart); \ + printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); #else #define PERF_COUNT_START(FUNCT_NAME) #define PERF_COUNT_END #endif #if PERF_COUNT_VERBOSE >= 3 -#define PERF_COUNT_SUB(SUB) \ - QueryPerformanceCounter(&time_sub_end); \ - elapsed_time_sec = (time_sub_end.QuadPart-time_sub_start.QuadPart)/(double)(freq.QuadPart); \ - printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ - time_sub_start = time_sub_end; +#define PERF_COUNT_SUB(SUB) \ + QueryPerformanceCounter(&time_sub_end); \ + elapsed_time_sec = (time_sub_end.QuadPart - time_sub_start.QuadPart) / \ + (double)(freq.QuadPart); \ + printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ + time_sub_start = time_sub_end; #else #define PERF_COUNT_SUB(SUB) #endif @@ -121,21 +124,25 @@ time_sub_start = time_funct_start; \ time_sub_end = time_funct_start; -#define PERF_COUNT_END \ - clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); \ - elapsed_time_sec = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; \ - printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); +#define PERF_COUNT_END \ + clock_gettime(CLOCK_MONOTONIC, &time_funct_end); \ + elapsed_time_sec = \ + (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + \ + (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; \ + printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); #else #define PERF_COUNT_START(FUNCT_NAME) #define PERF_COUNT_END #endif #if PERF_COUNT_VERBOSE >= 3 -#define PERF_COUNT_SUB(SUB) \ - clock_gettime( CLOCK_MONOTONIC, &time_sub_end ); \ - elapsed_time_sec = (time_sub_end.tv_sec - time_sub_start.tv_sec)*1.0 + (time_sub_end.tv_nsec - time_sub_start.tv_nsec)/1000000000.0; \ - printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ - time_sub_start = time_sub_end; +#define PERF_COUNT_SUB(SUB) \ + clock_gettime(CLOCK_MONOTONIC, &time_sub_end); \ + elapsed_time_sec = \ + (time_sub_end.tv_sec - time_sub_start.tv_sec) * 1.0 + \ + (time_sub_end.tv_nsec - time_sub_start.tv_nsec) / 1000000000.0; \ + printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ + time_sub_start = time_sub_end; #else #define PERF_COUNT_SUB(SUB) #endif @@ -262,12 +269,12 @@ class OpenclDevice // OpenCL implementation of Morphology (Hollow = Closed - Open) static PIX* pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize, bool reqDataCopy); - static void pixGetLinesCL(PIX *pixd, PIX *pixs, - PIX** pix_vline, PIX** pix_hline, - PIX** pixClosed, bool getpixClosed, - l_int32 close_hsize, l_int32 close_vsize, - l_int32 open_hsize, l_int32 open_vsize, - l_int32 line_hsize, l_int32 line_vsize); + static void pixGetLinesCL(PIX *pixd, PIX *pixs, PIX **pix_vline, + PIX **pix_hline, PIX **pixClosed, + bool getpixClosed, l_int32 close_hsize, + l_int32 close_vsize, l_int32 open_hsize, + l_int32 open_vsize, l_int32 line_hsize, + l_int32 line_vsize); //int InitOpenclAttr( OpenCLEnv * env ); //int ReleaseKernel( KernelEnv * env ); @@ -288,34 +295,24 @@ class OpenclDevice static void FreeOpenclDll(); #endif - inline static int AddKernelConfig( int kCount, const char *kName ); /* for binarization */ - static int HistogramRectOCL( - const unsigned char *imagedata, - int bytes_per_pixel, - int bytes_per_line, - int left, - int top, - int width, - int height, - int kHistogramSize, - int *histogramAllChannels); - - static int ThresholdRectToPixOCL( - const unsigned char* imagedata, - int bytes_per_pixel, - int bytes_per_line, - const int* thresholds, - const int* hi_values, - Pix** pix, - int rect_height, - int rect_width, - int rect_top, - int rect_left); - - static Pix * pixConvertRGBToGrayOCL( Pix *pix, float weightRed = 0.3, float weightGreen = 0.5, float weightBlue = 0.2 ); + static int HistogramRectOCL(const unsigned char *imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height, + int kHistogramSize, int *histogramAllChannels); + + static int ThresholdRectToPixOCL(const unsigned char *imagedata, + int bytes_per_pixel, int bytes_per_line, + const int *thresholds, + const int *hi_values, Pix **pix, + int rect_height, int rect_width, + int rect_top, int rect_left); + + static Pix *pixConvertRGBToGrayOCL(Pix *pix, float weightRed = 0.3, + float weightGreen = 0.5, + float weightBlue = 0.2); static ds_device getDeviceSelection(); static ds_device selectedDevice; diff --git a/tessdata/configs/box.train.stderr b/tessdata/configs/box.train.stderr index 6fc51fdd5e..d44ff2b2c7 100644 --- a/tessdata/configs/box.train.stderr +++ b/tessdata/configs/box.train.stderr @@ -1,7 +1,7 @@ -file_type .bl -#tessedit_use_nn F -textord_fast_pitch_test T -tessedit_single_match 0 +file_type .bl +#tessedit_use_nn F +textord_fast_pitch_test T +tessedit_single_match 0 tessedit_zero_rejection T tessedit_minimal_rejection F tessedit_write_rep_codes F diff --git a/textord/blkocc.h b/textord/blkocc.h index 89462dc86f..8305c36cdb 100644 --- a/textord/blkocc.h +++ b/textord/blkocc.h @@ -72,12 +72,12 @@ ELISTIZEH (REGION_OCC) Adapted from the following procedure so that it can be used in the bands class in an include file... -BOOL8 range_in_band[ +BOOL8 range_in_band[ range within band? -inT16 band_max, -inT16 band_min, -inT16 range_max, -inT16 range_min] +inT16 band_max, +inT16 band_min, +inT16 range_max, +inT16 range_min] { if ( (range_min >= band_min) && (range_max < band_max) ) return TRUE; @@ -91,12 +91,12 @@ inT16 range_min] Adapted from the following procedure so that it can be used in the bands class in an include file... -BOOL8 range_overlaps_band[ +BOOL8 range_overlaps_band[ range crosses band? -inT16 band_max, -inT16 band_min, -inT16 range_max, -inT16 range_min] +inT16 band_max, +inT16 band_min, +inT16 range_max, +inT16 range_min] { if ( (range_max >= band_min) && (range_min < band_max) ) return TRUE; diff --git a/textord/drawedg.h b/textord/drawedg.h index 6bf062d4ee..ef5ed5e202 100644 --- a/textord/drawedg.h +++ b/textord/drawedg.h @@ -1,8 +1,9 @@ /********************************************************************** * File: drawedg.h (Formerly drawedge.h) - * Description: Collection of functions to draw things to do with edge detection. - * Author: Ray Smith - * Created: Thu Jun 06 13:29:20 BST 1991 + * Description: Collection of functions to draw things to do with edge + *detection. + * Author: Ray Smith + * Created: Thu Jun 06 13:29:20 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/textord/fpchop.cpp b/textord/fpchop.cpp index 4c18338b8f..be2768cce9 100644 --- a/textord/fpchop.cpp +++ b/textord/fpchop.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: fpchop.cpp (Formerly fp_chop.c) * Description: Code to chop fixed pitch text into character cells. - * Author: Ray Smith - * Created: Thu Sep 16 11:14:15 BST 1993 + * Author: Ray Smith + * Created: Thu Sep 16 11:14:15 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -730,7 +730,6 @@ C_OUTLINE *join_chopped_fragments( //join pieces return NULL; } - /********************************************************************** * join_segments * diff --git a/textord/makerow.cpp b/textord/makerow.cpp index 1df4855b40..c8170e5d16 100644 --- a/textord/makerow.cpp +++ b/textord/makerow.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: makerow.cpp (Formerly makerows.c) * Description: Code to arrange blobs into rows of text. - * Author: Ray Smith - * Created: Mon Sep 21 14:34:48 BST 1992 + * Author: Ray Smith + * Created: Mon Sep 21 14:34:48 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -671,7 +671,7 @@ BOOL8 find_best_dropout_row( //find neighbours TO_ROW_IT *row_it, //current position BOOL8 testing_on //correct orientation ) { - inT32 next_index; //of neighbouring row + inT32 next_index; // of neighbouring row inT32 row_offset; //from current row inT32 abs_dist; //absolute distance inT8 row_inc; //increment to row_index @@ -1786,7 +1786,7 @@ static int CountOverlaps(const TBOX& box, int min_height, BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); - TBOX blob_box = blob->bounding_box(); + const TBOX &blob_box = blob->bounding_box(); if (blob_box.height() >= min_height && box.major_overlap(blob_box)) { ++overlaps; } diff --git a/textord/oldbasel.cpp b/textord/oldbasel.cpp index c73fe8d57b..99e55fdbb6 100644 --- a/textord/oldbasel.cpp +++ b/textord/oldbasel.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: oldbasel.cpp (Formerly oldbl.c) * Description: A re-implementation of the old baseline algorithm. - * Author: Ray Smith - * Created: Wed Oct 6 09:41:48 BST 1993 + * Author: Ray Smith + * Created: Wed Oct 6 09:41:48 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -122,7 +122,7 @@ void Textord::correlate_lines(TO_BLOCK *block, float gradient) { TO_ROW **rows; //array of ptrs int rowcount; /*no of rows to do */ int rowindex; /*no of row */ - //iterator + // iterator TO_ROW_IT row_it = block->get_rows (); rowcount = row_it.length (); @@ -1018,61 +1018,6 @@ int *partcount /*no of partitions */ return bestpart; } - -///*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking -//partitions and gives all the rest partid 0*/ -// -//merge_partitions(partids,partcount,blobcount,bestpart) -//register char *partids; /*partition numbers*/ -//int partcount; /*no of partitions*/ -//int blobcount; /*no of blobs*/ -//int bestpart; /*best partition*/ -//{ -// int blobindex; /*no along text line*/ -// int runlength; /*run of same partition*/ -// int bestrun; /*biggest runlength*/ -// -// bestrun=0; /*no runs yet*/ -// runlength=1; -// for (blobindex=1;blobindexbestrun) -// bestrun=runlength; /*find biggest run*/ -// runlength=1; /*new run*/ -// } -// else -// { runlength++; -// } -// } -// if (runlength>bestrun) -// bestrun=runlength; -// -// for (blobindex=0;blobindex=blobcount -// || partids[blobindex]!=partids[blobindex+1]) -// /*loner*/ -// && (bestrun>2 || partids[blobindex]!=bestpart)) -// { partids[blobindex]=partcount; /*discard loner*/ -// } -// else if (blobindex+1=blobcount -// || partids[blobindex]!=partids[blobindex+2]) -// && (bestrun>3 || partids[blobindex]!=bestpart)) -// { partids[blobindex]=partcount; /*discard both*/ -// partids[blobindex+1]=partcount; -// } -// } -// } -// for (blobindex=0;blobindex= MINASCRISE && partsizes[partition] > poscount) { @@ -1459,8 +1402,8 @@ int blobcount, /*blobs in blobcoords */ QSPLINE * baseline, /*established */ float jumplimit /*min ascender height */ ) { - int blobindex; /*current blob */ - /*height statistics */ + int blobindex; /*current blob */ + /*height statistics */ STATS heightstat (0, MAXHEIGHT); int height; /*height of blob */ int xcentre; /*centre of blob */ diff --git a/textord/pitsync1.h b/textord/pitsync1.h index c2fb9bec65..5374b003dd 100644 --- a/textord/pitsync1.h +++ b/textord/pitsync1.h @@ -1,8 +1,8 @@ /********************************************************************** * File: pitsync1.h (Formerly pitsync.h) * Description: Code to find the optimum fixed pitch segmentation of some blobs. - * Author: Ray Smith - * Created: Thu Nov 19 11:48:05 GMT 1992 + * Author: Ray Smith + * Created: Thu Nov 19 11:48:05 GMT 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -46,7 +46,7 @@ class FPSEGPT:public ELIST_LINK FPSEGPT_LIST *prev_list); //previous segment FPSEGPT(FPCUTPT *cutpt); //build from new type - inT32 position() { //access func + inT32 position() { // access func return xpos; } double cost_function() { diff --git a/textord/tabvector.cpp b/textord/tabvector.cpp index c8d508f0df..fcc64a7721 100644 --- a/textord/tabvector.cpp +++ b/textord/tabvector.cpp @@ -523,12 +523,12 @@ const char* kAlignmentNames[] = { // Print basic information about this tab vector. void TabVector::Print(const char* prefix) { - tprintf("%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d," - " partners=%d\n", - prefix, kAlignmentNames[alignment_], - startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y(), - mean_width_, percent_score_, sort_key_, - boxes_.length(), partners_.length()); + tprintf( + "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d," + " partners=%d\n", + prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(), + endpt_.x(), endpt_.y(), mean_width_, percent_score_, sort_key_, + boxes_.length(), partners_.length()); } // Print basic information about this tab vector and every box in it. diff --git a/textord/textlineprojection.cpp b/textord/textlineprojection.cpp index 6018e5fdaa..2651a19b33 100644 --- a/textord/textlineprojection.cpp +++ b/textord/textlineprojection.cpp @@ -760,7 +760,7 @@ void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const { pt->y = ClipToRange(pt->y, 0, pixGetHeight(pix_) - 1); } #ifdef _MSC_VER -#pragma optimize( "", on ) +#pragma optimize("", on) #endif // _MSC_VER // Transform tesseract image coordinates to coordinates used in the projection. diff --git a/textord/textord.cpp b/textord/textord.cpp index 1f7e8a8869..94ef49c783 100644 --- a/textord/textord.cpp +++ b/textord/textord.cpp @@ -33,7 +33,8 @@ namespace tesseract { Textord::Textord(CCStruct* ccstruct) - : ccstruct_(ccstruct), use_cjk_fp_model_(false), + : ccstruct_(ccstruct), + use_cjk_fp_model_(false), // makerow.cpp /////////////////////////////////////////// BOOL_MEMBER(textord_single_height_mode, false, "Script has no xheight, so use a single mode", @@ -46,24 +47,20 @@ Textord::Textord(CCStruct* ccstruct) "old_to_method.", ccstruct_->params()), BOOL_MEMBER(tosp_only_use_prop_rows, true, - "Block stats to use fixed pitch rows?", - ccstruct_->params()), + "Block stats to use fixed pitch rows?", ccstruct_->params()), BOOL_MEMBER(tosp_force_wordbreak_on_punct, false, "Force word breaks on punct to break long lines in non-space " "delimited langs", ccstruct_->params()), - BOOL_MEMBER(tosp_use_pre_chopping, false, - "Space stats use prechopping?", + BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?", ccstruct_->params()), BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", ccstruct_->params()), - BOOL_MEMBER(tosp_block_use_cert_spaces, true, - "Only stat OBVIOUS spaces", + BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params()), BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params()), - BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, - "Only stat OBVIOUS spaces", + BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces", ccstruct_->params()), BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", ccstruct_->params()), @@ -78,30 +75,24 @@ Textord::Textord(CCStruct* ccstruct) "Don't restrict kn->sp fuzzy limit to tables", ccstruct_->params()), BOOL_MEMBER(tosp_stats_use_xht_gaps, true, - "Use within xht gap for wd breaks", - ccstruct_->params()), + "Use within xht gap for wd breaks", ccstruct_->params()), BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", ccstruct_->params()), BOOL_MEMBER(tosp_only_use_xht_gaps, false, - "Only use within xht gap for wd breaks", - ccstruct_->params()), + "Only use within xht gap for wd breaks", ccstruct_->params()), BOOL_MEMBER(tosp_rule_9_test_punct, false, - "Don't chng kn to space next to punct", - ccstruct_->params()), + "Don't chng kn to space next to punct", ccstruct_->params()), BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", ccstruct_->params()), BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", ccstruct_->params()), BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", ccstruct_->params()), - INT_MEMBER(tosp_debug_level, 0, "Debug data", - ccstruct_->params()), + INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()), INT_MEMBER(tosp_enough_space_samples_for_median, 3, - "or should we use mean", - ccstruct_->params()), + "or should we use mean", ccstruct_->params()), INT_MEMBER(tosp_redo_kern_limit, 10, - "No.samples reqd to reestimate for row", - ccstruct_->params()), + "No.samples reqd to reestimate for row", ccstruct_->params()), INT_MEMBER(tosp_few_samples, 40, "No.gaps reqd with 1 large gap to treat as a table", ccstruct_->params()), @@ -114,30 +105,24 @@ Textord::Textord(CCStruct* ccstruct) "Factor for defining space threshold in terms of space and " "kern sizes", ccstruct_->params()), - double_MEMBER(tosp_threshold_bias1, 0, - "how far between kern and space?", + double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?", ccstruct_->params()), - double_MEMBER(tosp_threshold_bias2, 0, - "how far between kern and space?", + double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?", ccstruct_->params()), double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", ccstruct_->params()), double_MEMBER(tosp_narrow_aspect_ratio, 0.48, - "narrow if w/h less than this", - ccstruct_->params()), + "narrow if w/h less than this", ccstruct_->params()), double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", ccstruct_->params()), double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", ccstruct_->params()), double_MEMBER(tosp_fuzzy_space_factor, 0.6, - "Fract of xheight for fuzz sp", - ccstruct_->params()), + "Fract of xheight for fuzz sp", ccstruct_->params()), double_MEMBER(tosp_fuzzy_space_factor1, 0.5, - "Fract of xheight for fuzz sp", - ccstruct_->params()), + "Fract of xheight for fuzz sp", ccstruct_->params()), double_MEMBER(tosp_fuzzy_space_factor2, 0.72, - "Fract of xheight for fuzz sp", - ccstruct_->params()), + "Fract of xheight for fuzz sp", ccstruct_->params()), double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", ccstruct_->params()), double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", @@ -156,14 +141,11 @@ Textord::Textord(CCStruct* ccstruct) "Fract of kerns reqd for isolated row stats", ccstruct_->params()), double_MEMBER(tosp_table_kn_sp_ratio, 2.25, - "Min difference of kn & sp in table", - ccstruct_->params()), + "Min difference of kn & sp in table", ccstruct_->params()), double_MEMBER(tosp_table_xht_sp_ratio, 0.33, - "Expect spaces bigger than this", - ccstruct_->params()), + "Expect spaces bigger than this", ccstruct_->params()), double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, - "Fuzzy if less than this", - ccstruct_->params()), + "Fuzzy if less than this", ccstruct_->params()), double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", ccstruct_->params()), double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", @@ -172,20 +154,16 @@ Textord::Textord(CCStruct* ccstruct) "Don't trust spaces less than this time kn", ccstruct_->params()), double_MEMBER(tosp_init_guess_kn_mult, 2.2, - "Thresh guess - mult kn by this", - ccstruct_->params()), + "Thresh guess - mult kn by this", ccstruct_->params()), double_MEMBER(tosp_init_guess_xht_mult, 0.28, - "Thresh guess - mult xht by this", - ccstruct_->params()), + "Thresh guess - mult xht by this", ccstruct_->params()), double_MEMBER(tosp_max_sane_kn_thresh, 5.0, - "Multiplier on kn to limit thresh", - ccstruct_->params()), + "Multiplier on kn to limit thresh", ccstruct_->params()), double_MEMBER(tosp_flip_caution, 0.0, "Don't autoflip kn to sp when large separation", ccstruct_->params()), double_MEMBER(tosp_large_kerning, 0.19, - "Limit use of xht gap with large kns", - ccstruct_->params()), + "Limit use of xht gap with large kns", ccstruct_->params()), double_MEMBER(tosp_dont_fool_with_small_kerns, -1, "Limit use of xht gap with odd small kns", ccstruct_->params()), @@ -193,11 +171,9 @@ Textord::Textord(CCStruct* ccstruct) "Don't reduce box if the top left is non blank", ccstruct_->params()), double_MEMBER(tosp_silly_kn_sp_gap, 0.2, - "Don't let sp minus kn get too small", - ccstruct_->params()), + "Don't let sp minus kn get too small", ccstruct_->params()), double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, - "How wide fuzzies need context", - ccstruct_->params()), + "How wide fuzzies need context", ccstruct_->params()), // tordmain.cpp /////////////////////////////////////////// BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", ccstruct_->params()), @@ -206,34 +182,27 @@ Textord::Textord(CCStruct* ccstruct) BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", ccstruct_->params()), INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", - ccstruct_->params()), + ccstruct_->params()), INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level", - ccstruct_->params()), + ccstruct_->params()), double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs", ccstruct_->params()), double_MEMBER(textord_noise_area_ratio, 0.7, - "Fraction of bounding box for noise", - ccstruct_->params()), + "Fraction of bounding box for noise", ccstruct_->params()), double_MEMBER(textord_blob_size_smallile, 20, - "Percentile for small blobs", - ccstruct_->params()), + "Percentile for small blobs", ccstruct_->params()), double_MEMBER(textord_initialx_ile, 0.75, - "Ile of sizes for xheight guess", - ccstruct_->params()), + "Ile of sizes for xheight guess", ccstruct_->params()), double_MEMBER(textord_initialasc_ile, 0.90, - "Ile of sizes for xheight guess", - ccstruct_->params()), - INT_MEMBER(textord_noise_sizefraction, 10, - "Fraction of size for maxima", + "Ile of sizes for xheight guess", ccstruct_->params()), + INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima", ccstruct_->params()), double_MEMBER(textord_noise_sizelimit, 0.5, - "Fraction of x for big t count", - ccstruct_->params()), + "Fraction of x for big t count", ccstruct_->params()), INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", ccstruct_->params()), double_MEMBER(textord_noise_normratio, 2.0, - "Dot to norm ratio for deletion", - ccstruct_->params()), + "Dot to norm ratio for deletion", ccstruct_->params()), BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", ccstruct_->params()), BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", @@ -242,24 +211,20 @@ Textord::Textord(CCStruct* ccstruct) "xh fract height error for norm blobs", ccstruct_->params()), double_MEMBER(textord_noise_sxfract, 0.4, - "xh fract width error for norm blobs", - ccstruct_->params()), - double_MEMBER(textord_noise_hfract, 1.0/64, + "xh fract width error for norm blobs", ccstruct_->params()), + double_MEMBER(textord_noise_hfract, 1.0 / 64, "Height fraction to discard outlines as speckle noise", ccstruct_->params()), INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", ccstruct_->params()), double_MEMBER(textord_noise_rowratio, 6.0, - "Dot to norm ratio for deletion", - ccstruct_->params()), + "Dot to norm ratio for deletion", ccstruct_->params()), BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", ccstruct_->params()), double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", ccstruct_->params()), double_MEMBER(textord_blshift_xfraction, 9.99, - "Min size of baseline shift", - ccstruct_->params()) { -} + "Min size of baseline shift", ccstruct_->params()) {} Textord::~Textord() { } @@ -324,10 +289,9 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew, BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks); baseline_detector.ComputeStraightBaselines(use_box_bottoms); - baseline_detector.ComputeBaselineSplinesAndXheights(page_tr_, true, - textord_heavy_nr, - textord_show_final_rows, - this); + baseline_detector.ComputeBaselineSplinesAndXheights( + page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr, + textord_show_final_rows, this); // Now make the words in the lines. if (PSM_WORD_FIND_ENABLED(pageseg_mode)) { // SINGLE_LINE uses the old word maker on the single line. diff --git a/textord/topitch.cpp b/textord/topitch.cpp index ae9999f7db..3dda815777 100644 --- a/textord/topitch.cpp +++ b/textord/topitch.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: topitch.cpp (Formerly to_pitch.c) * Description: Code to determine fixed pitchness and the pitch if fixed. - * Author: Ray Smith - * Created: Tue Aug 24 16:57:29 BST 1993 + * Author: Ray Smith + * Created: Tue Aug 24 16:57:29 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -1084,7 +1084,7 @@ BOOL8 count_pitch_stats( //find lines return FALSE; prev_valid = FALSE; prev_centre = 0; - prev_right = 0; //stop compiler warning + prev_right = 0; // stop compiler warning joined_box = blob_it.data ()->bounding_box (); do { blob_it.forward (); diff --git a/textord/tovars.cpp b/textord/tovars.cpp index 71114358ac..6b1b833248 100644 --- a/textord/tovars.cpp +++ b/textord/tovars.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: tovars.cpp (Formerly to_vars.c) * Description: Variables used by textord. - * Author: Ray Smith - * Created: Tue Aug 24 16:55:02 BST 1993 + * Author: Ray Smith + * Created: Tue Aug 24 16:55:02 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -49,8 +49,8 @@ EXTERN double_VAR (textord_words_default_minspace, 0.6, EXTERN double_VAR (textord_words_min_minspace, 0.3, "Fraction of xheight"); EXTERN double_VAR (textord_words_default_nonspace, 0.2, "Fraction of xheight"); -EXTERN double_VAR (textord_words_initial_lower, 0.25, -"Max initial cluster size"); +EXTERN double_VAR(textord_words_initial_lower, 0.25, + "Max initial cluster size"); EXTERN double_VAR (textord_words_initial_upper, 0.15, "Min initial cluster spacing"); EXTERN double_VAR (textord_words_minlarge, 0.75, @@ -67,7 +67,7 @@ EXTERN double_VAR (textord_pitch_rowsimilarity, 0.08, "Fraction of xheight for sameness"); EXTERN BOOL_VAR (textord_pitch_scalebigwords, FALSE, "Scale scores on big words"); -EXTERN double_VAR (words_initial_lower, 0.5, "Max initial cluster size"); +EXTERN double_VAR(words_initial_lower, 0.5, "Max initial cluster size"); EXTERN double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing"); EXTERN double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight"); EXTERN double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight"); diff --git a/textord/tovars.h b/textord/tovars.h index 99edae1d2c..46315bb96b 100644 --- a/textord/tovars.h +++ b/textord/tovars.h @@ -1,8 +1,8 @@ /********************************************************************** * File: tovars.h (Formerly to_vars.h) * Description: Variables used by textord. - * Author: Ray Smith - * Created: Tue Aug 24 16:55:02 BST 1993 + * Author: Ray Smith + * Created: Tue Aug 24 16:55:02 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -51,8 +51,8 @@ extern double_VAR_H (textord_words_default_minspace, 0.6, extern double_VAR_H (textord_words_min_minspace, 0.3, "Fraction of xheight"); extern double_VAR_H (textord_words_default_nonspace, 0.2, "Fraction of xheight"); -extern double_VAR_H (textord_words_initial_lower, 0.25, -"Max initial cluster size"); +extern double_VAR_H(textord_words_initial_lower, 0.25, + "Max initial cluster size"); extern double_VAR_H (textord_words_initial_upper, 0.15, "Min initial cluster spacing"); extern double_VAR_H (textord_words_minlarge, 0.75, @@ -69,7 +69,7 @@ extern double_VAR_H (textord_pitch_rowsimilarity, 0.08, "Fraction of xheight for sameness"); extern BOOL_VAR_H (textord_pitch_scalebigwords, FALSE, "Scale scores on big words"); -extern double_VAR_H (words_initial_lower, 0.5, "Max initial cluster size"); +extern double_VAR_H(words_initial_lower, 0.5, "Max initial cluster size"); extern double_VAR_H (words_initial_upper, 0.15, "Min initial cluster spacing"); extern double_VAR_H (words_default_prop_nonspace, 0.25, diff --git a/training/classifier_tester.cpp b/training/classifier_tester.cpp index 48f3781ebb..ed7e50cd2f 100644 --- a/training/classifier_tester.cpp +++ b/training/classifier_tester.cpp @@ -48,9 +48,9 @@ enum ClassifierName { const char* names[] = {"pruner", "full", #ifndef NO_CUBE_BUILD - "cube", "cubetess", + "cube", "cubetess", #endif // NO_CUBE_BUILD - NULL }; + NULL}; static tesseract::ShapeClassifier* InitializeClassifier( const char* classifer_name, const UNICHARSET& unicharset, @@ -80,7 +80,7 @@ static tesseract::ShapeClassifier* InitializeClassifier( tesseract::Classify* classify = NULL; if ( #ifndef NO_CUBE_BUILD - classifier == CN_CUBE || classifier == CN_CUBETESS || + classifier == CN_CUBE || classifier == CN_CUBETESS || #endif // NO_CUBE_BUILD classifier == CN_PRUNER || classifier == CN_FULL) { #ifndef NO_CUBE_BUILD diff --git a/training/cntraining.cpp b/training/cntraining.cpp index ab19ddb93d..916a758576 100644 --- a/training/cntraining.cpp +++ b/training/cntraining.cpp @@ -20,7 +20,6 @@ ** limitations under the License. ******************************************************************************/ - /*---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------*/ @@ -53,10 +52,8 @@ int main ( Private Function Prototypes ----------------------------------------------------------------------------*/ -void WriteNormProtos ( - const char *Directory, - LIST LabeledProtoList, - CLUSTERER *Clusterer); +void WriteNormProtos (const char *Directory, LIST LabeledProtoList, + CLUSTERER *Clusterer); /* PARAMDESC *ConvertToPARAMDESC( @@ -81,7 +78,6 @@ CLUSTERCONFIG CNConfig = elliptical, 0.025, 0.05, 0.8, 1e-3, 0 }; - /*---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------*/ @@ -134,8 +130,7 @@ CLUSTERCONFIG CNConfig = * @note Exceptions: none * @note History: Fri Aug 18 08:56:17 1989, DSJ, Created. */ -int main(int argc, char* argv[]) -{ +int main(int argc, char *argv[]) { // Set the global Config parameters before parsing the command line. Config = CNConfig; @@ -221,10 +216,8 @@ int main(int argc, char* argv[]) * @note Exceptions: none * @note History: Fri Aug 18 16:17:06 1989, DSJ, Created. */ -void WriteNormProtos ( - const char *Directory, - LIST LabeledProtoList, - CLUSTERER *Clusterer) +void WriteNormProtos(const char *Directory, LIST LabeledProtoList, + CLUSTERER *Clusterer) { FILE *File; STRING Filename; @@ -240,8 +233,8 @@ void WriteNormProtos ( Filename += "normproto"; printf ("\nWriting %s ...", Filename.string()); File = Efopen (Filename.string(), "wb"); - fprintf(File,"%0d\n",Clusterer->SampleSize); - WriteParamDesc(File,Clusterer->SampleSize,Clusterer->ParamDesc); + fprintf(File, "%0d\n", Clusterer->SampleSize); + WriteParamDesc(File, Clusterer->SampleSize,Clusterer->ParamDesc); iterate(LabeledProtoList) { LabeledProto = (LABELEDLIST) first_node (LabeledProtoList); diff --git a/training/commandlineflags.cpp b/training/commandlineflags.cpp index d8cb371828..56dcb84221 100644 --- a/training/commandlineflags.cpp +++ b/training/commandlineflags.cpp @@ -115,36 +115,36 @@ void PrintCommandLineFlags() { if (!strncmp(GlobalParams()->int_params[i]->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) { printf(" --%s %s (type:int default:%d)\n", - GlobalParams()->int_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->int_params[i]->info_str(), - inT32(*(GlobalParams()->int_params[i]))); + GlobalParams()->int_params[i]->name_str() + kFlagNamePrefixLen, + GlobalParams()->int_params[i]->info_str(), + inT32(*(GlobalParams()->int_params[i]))); } } for (int i = 0; i < GlobalParams()->double_params.size(); ++i) { if (!strncmp(GlobalParams()->double_params[i]->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) { printf(" --%s %s (type:double default:%g)\n", - GlobalParams()->double_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->double_params[i]->info_str(), - static_cast(*(GlobalParams()->double_params[i]))); + GlobalParams()->double_params[i]->name_str() + kFlagNamePrefixLen, + GlobalParams()->double_params[i]->info_str(), + static_cast(*(GlobalParams()->double_params[i]))); } } for (int i = 0; i < GlobalParams()->bool_params.size(); ++i) { if (!strncmp(GlobalParams()->bool_params[i]->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) { printf(" --%s %s (type:bool default:%s)\n", - GlobalParams()->bool_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->bool_params[i]->info_str(), - (BOOL8(*(GlobalParams()->bool_params[i])) ? "true" : "false")); + GlobalParams()->bool_params[i]->name_str() + kFlagNamePrefixLen, + GlobalParams()->bool_params[i]->info_str(), + (BOOL8(*(GlobalParams()->bool_params[i])) ? "true" : "false")); } } for (int i = 0; i < GlobalParams()->string_params.size(); ++i) { if (!strncmp(GlobalParams()->string_params[i]->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) { printf(" --%s %s (type:string default:%s)\n", - GlobalParams()->string_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->string_params[i]->info_str(), - GlobalParams()->string_params[i]->string()); + GlobalParams()->string_params[i]->name_str() + kFlagNamePrefixLen, + GlobalParams()->string_params[i]->info_str(), + GlobalParams()->string_params[i]->string()); } } } diff --git a/training/commontraining.cpp b/training/commontraining.cpp index df2521f513..b7243e6f3f 100644 --- a/training/commontraining.cpp +++ b/training/commontraining.cpp @@ -39,8 +39,8 @@ #include using tesseract::CCUtil; -using tesseract::FontInfo; using tesseract::IntFeatureSpace; +using tesseract::FontInfo; using tesseract::ParamUtils; using tesseract::ShapeTable; @@ -312,9 +312,7 @@ const char *GetNextFilename(int argc, const char* const * argv) { return argv[tessoptind++]; else return NULL; -} /* GetNextFilename */ - - +} /* GetNextFilename */ /*---------------------------------------------------------------------------*/ /** @@ -328,11 +326,8 @@ const char *GetNextFilename(int argc, const char* const * argv) { * @note Exceptions: none * @note History: Fri Aug 18 15:57:41 1989, DSJ, Created. */ -LABELEDLIST FindList ( - LIST List, - char *Label) -{ - LABELEDLIST LabeledList; +LABELEDLIST FindList(LIST List, char* Label) { + LABELEDLIST LabeledList; iterate (List) { @@ -342,7 +337,7 @@ LABELEDLIST FindList ( } return (NULL); -} /* FindList */ +} /* FindList */ /*---------------------------------------------------------------------------*/ /** @@ -354,10 +349,8 @@ LABELEDLIST FindList ( * @note Exceptions: none * @note History: Fri Aug 18 16:08:46 1989, DSJ, Created. */ -LABELEDLIST NewLabeledList ( - const char *Label) -{ - LABELEDLIST LabeledList; +LABELEDLIST NewLabeledList(const char* Label) { + LABELEDLIST LabeledList; LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE)); LabeledList->Label = (char*)Emalloc (strlen (Label)+1); @@ -367,7 +360,7 @@ LABELEDLIST NewLabeledList ( LabeledList->font_sample_count = 0; return (LabeledList); -} /* NewLabeledList */ +} /* NewLabeledList */ /*---------------------------------------------------------------------------*/ // TODO(rays) This is now used only by cntraining. Convert cntraining to use @@ -386,7 +379,7 @@ LABELEDLIST NewLabeledList ( * @return none * @note Globals: none * @note Exceptions: none - * @note History: + * @note History: * - Fri Aug 18 13:11:39 1989, DSJ, Created. * - Tue May 17 1998 simplifications to structure, illiminated * font, and feature specification levels of structure. @@ -460,11 +453,10 @@ void FreeTrainingSamples(LIST CharList) { FEATURE_SET FeatureSet; LIST FeatureList; - - iterate(CharList) { /* iterate through all of the fonts */ + iterate(CharList) { /* iterate through all of the fonts */ char_sample = (LABELEDLIST) first_node(CharList); FeatureList = char_sample->List; - iterate(FeatureList) { /* iterate through all of the classes */ + iterate(FeatureList) { /* iterate through all of the classes */ FeatureSet = (FEATURE_SET) first_node(FeatureList); FreeFeatureSet(FeatureSet); } @@ -535,12 +527,12 @@ CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, if ( Sample != NULL ) free( Sample ); return( Clusterer ); -} /* SetUpForClustering */ +} /* SetUpForClustering */ /*------------------------------------------------------------------------*/ void MergeInsignificantProtos(LIST ProtoList, const char* label, - CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { - PROTOTYPE *Prototype; + CLUSTERER* Clusterer, CLUSTERCONFIG* Config) { + PROTOTYPE* Prototype; bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0; LIST pProtoList = ProtoList; @@ -600,7 +592,7 @@ void MergeInsignificantProtos(LIST ProtoList, const char* label, Prototype->Significant = true; } } -} /* MergeInsignificantProtos */ +} /* MergeInsignificantProtos */ /*-----------------------------------------------------------------------------*/ void CleanUpUnusedData( @@ -695,14 +687,11 @@ LIST RemoveInsignificantProtos( } FreeProtoList(&ProtoList); return (NewProtoList); -} /* RemoveInsignificantProtos */ +} /* RemoveInsignificantProtos */ /*----------------------------------------------------------------------------*/ -MERGE_CLASS FindClass ( - LIST List, - const char *Label) -{ - MERGE_CLASS MergeClass; +MERGE_CLASS FindClass(LIST List, const char* Label) { + MERGE_CLASS MergeClass; iterate (List) { @@ -712,13 +701,11 @@ MERGE_CLASS FindClass ( } return (NULL); -} /* FindClass */ +} /* FindClass */ /*---------------------------------------------------------------------------*/ -MERGE_CLASS NewLabeledClass ( - const char *Label) -{ - MERGE_CLASS MergeClass; +MERGE_CLASS NewLabeledClass(const char* Label) { + MERGE_CLASS MergeClass; MergeClass = new MERGE_CLASS_NODE; MergeClass->Label = (char*)Emalloc (strlen (Label)+1); @@ -726,7 +713,7 @@ MERGE_CLASS NewLabeledClass ( MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS); return (MergeClass); -} /* NewLabeledClass */ +} /* NewLabeledClass */ /*-----------------------------------------------------------------------------*/ /** @@ -738,38 +725,36 @@ MERGE_CLASS NewLabeledClass ( * @note Exceptions: none * @note History: Fri Aug 18 17:44:27 1989, DSJ, Created. */ -void FreeLabeledClassList ( - LIST ClassList) -{ - MERGE_CLASS MergeClass; +void FreeLabeledClassList(LIST ClassList) { + MERGE_CLASS MergeClass; - iterate (ClassList) /* iterate through all of the fonts */ + iterate(ClassList) /* iterate through all of the fonts */ { MergeClass = (MERGE_CLASS) first_node (ClassList); free (MergeClass->Label); FreeClass(MergeClass->Class); delete MergeClass; } - destroy (ClassList); + destroy(ClassList); -} /* FreeLabeledClassList */ +} /* FreeLabeledClassList */ /* SetUpForFloat2Int */ CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset, LIST LabeledClassList) { - MERGE_CLASS MergeClass; - CLASS_TYPE Class; - int NumProtos; - int NumConfigs; - int NumWords; - int i, j; - float Values[3]; - PROTO NewProto; - PROTO OldProto; - BIT_VECTOR NewConfig; - BIT_VECTOR OldConfig; - - // printf("Float2Int ...\n"); + MERGE_CLASS MergeClass; + CLASS_TYPE Class; + int NumProtos; + int NumConfigs; + int NumWords; + int i, j; + float Values[3]; + PROTO NewProto; + PROTO OldProto; + BIT_VECTOR NewConfig; + BIT_VECTOR OldConfig; + + // printf("Float2Int ...\n"); CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()]; iterate(LabeledClassList) @@ -835,20 +820,19 @@ void Normalize ( } // Normalize /*-------------------------------------------------------------------------*/ -void FreeNormProtoList ( - LIST CharList) +void FreeNormProtoList(LIST CharList) { - LABELEDLIST char_sample; + LABELEDLIST char_sample; - iterate (CharList) /* iterate through all of the fonts */ + iterate(CharList) /* iterate through all of the fonts */ { char_sample = (LABELEDLIST) first_node (CharList); FreeLabeledList (char_sample); } - destroy (CharList); + destroy(CharList); -} // FreeNormProtoList +} // FreeNormProtoList /*---------------------------------------------------------------------------*/ void AddToNormProtosList( @@ -869,19 +853,16 @@ void AddToNormProtosList( } /*---------------------------------------------------------------------------*/ -int NumberOfProtos( - LIST ProtoList, - BOOL8 CountSigProtos, - BOOL8 CountInsigProtos) -{ +int NumberOfProtos(LIST ProtoList, BOOL8 CountSigProtos, + BOOL8 CountInsigProtos) { int N = 0; - PROTOTYPE *Proto; + PROTOTYPE* Proto; iterate(ProtoList) { Proto = (PROTOTYPE *) first_node ( ProtoList ); - if (( Proto->Significant && CountSigProtos ) || - ( ! Proto->Significant && CountInsigProtos ) ) + if ((Proto->Significant && CountSigProtos) || + (!Proto->Significant && CountInsigProtos)) N++; } return(N); diff --git a/training/stringrenderer.h b/training/stringrenderer.h index 942b7fddce..f0ba0c0b00 100644 --- a/training/stringrenderer.h +++ b/training/stringrenderer.h @@ -90,7 +90,7 @@ class StringRenderer { void set_underline_style(const PangoUnderline style) { underline_style_ = style; } - void set_features(const char *features) { + void set_features(const char* features) { free(features_); features_ = strdup(features); } @@ -130,12 +130,8 @@ class StringRenderer { const PangoFontInfo& font() const { return font_; } - int h_margin() const { - return h_margin_; - } - int v_margin() const { - return v_margin_; - } + int h_margin() const { return h_margin_; } + int v_margin() const { return v_margin_; } // Get the boxchars of all clusters rendered thus far (or since the last call // to ClearBoxes()). @@ -148,6 +144,9 @@ class StringRenderer { void RotatePageBoxes(float rotation); // Delete all boxes. void ClearBoxes(); + // Returns the boxes in a boxfile string. + string GetBoxesStr(); + // Writes the boxes to a boxfile. void WriteAllBoxes(const string& filename); // Removes space-delimited words from the string that are not renderable by // the current font and returns the count of such words. @@ -189,7 +188,7 @@ class StringRenderer { double underline_start_prob_; double underline_continuation_prob_; PangoUnderline underline_style_; - char *features_; + char* features_; // Text filtering options bool drop_uncovered_chars_; bool strip_unrenderable_words_; @@ -211,7 +210,7 @@ class StringRenderer { Boxa* page_boxes_; // Objects cached for subsequent calls to RenderAllFontsToImage() - hash_map char_map_; // Time-saving char histogram. + TessHashMap char_map_; // Time-saving char histogram. int total_chars_; // Number in the string to be rendered. int font_index_; // Index of next font to use in font list. int last_offset_; // Offset returned from last successful rendering diff --git a/training/text2image.cpp b/training/text2image.cpp index 406669dc10..946f6facda 100644 --- a/training/text2image.cpp +++ b/training/text2image.cpp @@ -251,6 +251,8 @@ void ExtractFontProperties(const string &utf8_text, // the input consists of the separated characters. NOTE(ranjith): As per // behdad@ this is not currently controllable at the level of the Pango // API. + // The most frequent of all is a single character "word" made by the CJK + // segmenter. // Safeguard against these cases here by just skipping the bigram. if (IsWhitespaceBox(boxes[b+1])) { continue; @@ -445,7 +447,7 @@ int main(int argc, char** argv) { string pango_name; if (!FontUtils::IsAvailableFont(FLAGS_font.c_str(), &pango_name)) { tprintf("Could not find font named %s.\n", FLAGS_font.c_str()); - if (!pango_name.empty()) { + if (!pango_name.empty()) { tprintf("Pango suggested font %s.\n", pango_name.c_str()); } tprintf("Please correct --font arg.\n"); @@ -523,7 +525,7 @@ int main(int argc, char** argv) { if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() && !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) { tprintf("Failed to load unicharset from file %s\n", - FLAGS_unicharset_file.c_str()); + FLAGS_unicharset_file.c_str()); exit(1); } @@ -604,7 +606,8 @@ int main(int argc, char** argv) { rotation = -1 * page_rotation[page_num]; } if (FLAGS_degrade_image) { - pix = DegradeImage(pix, FLAGS_exposure, &randomizer, FLAGS_rotate_image ? &rotation : NULL); + pix = DegradeImage(pix, FLAGS_exposure, &randomizer, + FLAGS_rotate_image ? &rotation : NULL); } render.RotatePageBoxes(rotation); diff --git a/viewer/scrollview.cpp b/viewer/scrollview.cpp index ac059d5469..f10b789ea8 100644 --- a/viewer/scrollview.cpp +++ b/viewer/scrollview.cpp @@ -37,7 +37,7 @@ #include "scrollview.h" #ifdef _MSC_VER -#pragma warning(disable:4786) // Don't give stupid warnings for stl +#pragma warning(disable:4786) // Don't give irrelevant warnings for stl #pragma warning(disable:4018) // signed/unsigned warnings #pragma warning(disable:4530) // exception warnings #endif diff --git a/viewer/svutil.h b/viewer/svutil.h index ccfce917fe..667c052083 100644 --- a/viewer/svutil.h +++ b/viewer/svutil.h @@ -26,6 +26,7 @@ #ifdef _WIN32 #ifndef __GNUC__ +#include "platform.h" #include #if defined(_MSC_VER) && _MSC_VER < 1900 #define snprintf _snprintf @@ -102,6 +103,17 @@ class SVMutex { #endif }; +// Auto-unlocking object that locks a mutex on construction and unlocks it +// on destruction. +class SVAutoLock { + public: + explicit SVAutoLock(SVMutex* mutex) : mutex_(mutex) { mutex->Lock(); } + ~SVAutoLock() { mutex_->Unlock(); } + + private: + SVMutex* mutex_; +}; + /// The SVNetwork class takes care of the remote connection for ScrollView /// This means setting up and maintaining a remote connection, sending and /// receiving messages and closing the connection. diff --git a/wordrec/lm_state.h b/wordrec/lm_state.h index 623bbb5e7f..6229e9b350 100644 --- a/wordrec/lm_state.h +++ b/wordrec/lm_state.h @@ -48,8 +48,8 @@ typedef unsigned char LanguageModelFlagsType; /// Each ViterbiStateEntry contains information from various components of the /// language model: dawgs in which the path is found, character ngram model /// probability of the path, script/chartype/font consistency info, state for -/// language-specific heuristics (e.g. hyphenated and compound words, lower/upper -/// case preferences, etc). +/// language-specific heuristics (e.g. hyphenated and compound words, +/// lower/upper case preferences, etc). /// /// Each ViterbiStateEntry also contains the parent pointer, so that the path /// that it represents (WERD_CHOICE) can be constructed by following these @@ -165,13 +165,13 @@ struct ViterbiStateEntry : public ELIST_LINK { /// Various information about the characters on the path represented /// by this ViterbiStateEntry. - float ratings_sum; //< sum of ratings of character on the path - float min_certainty; //< minimum certainty on the path - int adapted; //< number of BLOB_CHOICES from adapted templates - int length; //< number of characters on the path + float ratings_sum; //< sum of ratings of character on the path + float min_certainty; //< minimum certainty on the path + int adapted; //< number of BLOB_CHOICES from adapted templates + int length; //< number of characters on the path float outline_length; //< length of the outline so far LMConsistencyInfo consistency_info; //< path consistency info - AssociateStats associate_stats; //< character widths/gaps/seams + AssociateStats associate_stats; //< character widths/gaps/seams /// Flags for marking the entry as a top choice path with /// the smallest rating or lower/upper case letters). diff --git a/wordrec/measure.h b/wordrec/measure.h index 9c73906853..894938e55a 100644 --- a/wordrec/measure.h +++ b/wordrec/measure.h @@ -60,10 +60,9 @@ typedef struct * Add one more sample to a measurement. **********************************************************************/ -#define ADD_SAMPLE(m,s) \ -(m.sum_of_samples += (float) (s), \ - m.sum_of_squares += (float) (s) * (float) (s), \ - ++m.num_samples) +#define ADD_SAMPLE(m, s) \ + (m.sum_of_samples += (float)(s), \ + m.sum_of_squares += (float)(s) * (float)(s), ++m.num_samples) /********************************************************************** * mean @@ -71,10 +70,8 @@ typedef struct * Return the mean value of the measurement. **********************************************************************/ -#define MEAN(m) \ -((m).num_samples ? \ - ((float) ((m).sum_of_samples / (m).num_samples)) : \ - 0) +#define MEAN(m) \ + ((m).num_samples ? ((float)((m).sum_of_samples / (m).num_samples)) : 0) /********************************************************************** * new_measurement @@ -83,10 +80,8 @@ typedef struct * samples. **********************************************************************/ -#define new_measurement(m) \ -((m).num_samples = 0, \ - (m).sum_of_samples = 0, \ - (m).sum_of_squares = 0) +#define new_measurement(m) \ + ((m).num_samples = 0, (m).sum_of_samples = 0, (m).sum_of_squares = 0) /********************************************************************** * number_of_samples @@ -112,13 +107,12 @@ typedef struct * Return the variance of the measurement. **********************************************************************/ -#define VARIANCE(m) \ -(((m).num_samples > 1) ? \ - ((float) \ - (((m).num_samples * (m).sum_of_squares - \ - (m).sum_of_samples * (m).sum_of_samples) / \ - (((m).num_samples - 1) * (m).num_samples))) : \ - 0) +#define VARIANCE(m) \ + (((m).num_samples > 1) \ + ? ((float)(((m).num_samples * (m).sum_of_squares - \ + (m).sum_of_samples * (m).sum_of_samples) / \ + (((m).num_samples - 1) * (m).num_samples))) \ + : 0) /********************************************************************** * print_summary @@ -126,10 +120,8 @@ typedef struct * Summarize a MEASUREMENT record. **********************************************************************/ -#define print_summary(string,measure) \ -cprintf ("\t%-20s \tn = %d, \tm = %4.2f, \ts = %4.2f\n ", \ - string, \ - number_of_samples (measure), \ - MEAN (measure), \ - standard_deviation (measure)) +#define print_summary(string, measure) \ + cprintf("\t%-20s \tn = %d, \tm = %4.2f, \ts = %4.2f\n ", string, \ + number_of_samples(measure), MEAN(measure), \ + standard_deviation(measure)) #endif diff --git a/wordrec/pieces.cpp b/wordrec/pieces.cpp index 04e340396e..f7b406d5bc 100644 --- a/wordrec/pieces.cpp +++ b/wordrec/pieces.cpp @@ -267,7 +267,6 @@ void Wordrec::merge_and_put_fragment_lists(inT16 row, inT16 column, delete [] choice_lists_it; } - /********************************************************************** * get_fragment_lists * From 90651e111fa44e2a5f2c9ac5632b414203dfcb79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 25 Nov 2016 15:14:46 +0100 Subject: [PATCH 022/132] backport style changes from 4.00 for better identification of fixes and new code --- api/baseapi.cpp | 117 +- api/baseapi.h | 14 +- api/pdfrenderer.cpp | 13 +- api/renderer.cpp | 29 +- api/renderer.h | 20 +- api/tesseractmain.cpp | 162 ++- ccmain/control.cpp | 17 +- ccmain/docqual.cpp | 16 +- ccmain/fixspace.cpp | 5 +- ccmain/ltrresultiterator.cpp | 11 +- ccmain/ltrresultiterator.h | 3 +- ccmain/output.cpp | 28 +- ccmain/pagesegmain.cpp | 7 +- ccmain/paramsd.cpp | 10 +- ccmain/reject.cpp | 4 +- ccmain/tessedit.cpp | 2 +- ccmain/tesseractclass.cpp | 5 +- ccmain/tesseractclass.h | 3 +- ccstruct/blobbox.cpp | 4 +- ccstruct/boxread.cpp | 3 +- ccstruct/boxword.h | 4 +- ccstruct/coutln.cpp | 66 +- ccstruct/matrix.h | 4 +- ccstruct/mod128.cpp | 4 +- ccstruct/mod128.h | 4 +- ccstruct/otsuthr.cpp | 26 +- ccstruct/pageres.h | 6 +- ccstruct/pdblock.h | 150 +-- ccstruct/polyaprx.cpp | 2 +- ccstruct/polyblk.cpp | 2 +- ccstruct/quspline.cpp | 4 +- ccstruct/ratngs.h | 3 +- ccstruct/rect.cpp | 12 +- ccstruct/rect.h | 10 +- ccstruct/rejctmap.h | 61 +- ccstruct/statistc.cpp | 5 +- ccutil/clst.cpp | 29 +- ccutil/clst.h | 288 ++--- ccutil/elst.cpp | 26 +- ccutil/elst.h | 62 +- ccutil/elst2.cpp | 26 +- ccutil/elst2.h | 321 +++-- ccutil/errcode.h | 9 +- ccutil/lsterr.h | 2 +- ccutil/mainblk.cpp | 2 +- ccutil/ocrclass.h | 39 +- ccutil/params.cpp | 3 +- ccutil/strngs.cpp | 4 +- ccutil/tessdatamanager.h | 70 +- ccutil/unicharset.h | 3 +- classify/classify.cpp | 4 +- classify/cluster.cpp | 477 ++++--- classify/clusttool.cpp | 43 +- classify/clusttool.h | 22 +- classify/cutoffs.cpp | 10 +- classify/featdefs.cpp | 14 +- classify/fpoint.cpp | 11 +- classify/intmatcher.cpp | 26 +- classify/intmatcher.h | 10 +- classify/intproto.cpp | 18 +- classify/kdtree.cpp | 28 +- classify/mf.cpp | 12 +- classify/mfdefs.cpp | 16 +- classify/mfoutline.cpp | 5 +- classify/mfx.cpp | 4 +- classify/mfx.h | 10 +- classify/normfeat.cpp | 11 +- classify/normmatch.cpp | 18 +- classify/ocrfeatures.cpp | 33 +- classify/outfeat.cpp | 14 +- classify/picofeat.cpp | 12 +- cutil/bitvec.h | 42 +- cutil/danerror.cpp | 10 +- cutil/efio.cpp | 12 +- cutil/emalloc.cpp | 2 +- dict/context.cpp | 38 +- dict/dict.cpp | 64 +- dict/stopper.cpp | 11 +- dict/stopper.h | 10 +- opencl/openclwrapper.cpp | 1940 +++++++++++++---------------- opencl/openclwrapper.h | 97 +- tessdata/configs/box.train.stderr | 8 +- textord/blkocc.h | 20 +- textord/drawedg.h | 7 +- textord/fpchop.cpp | 5 +- textord/imagefind.cpp | 2 +- textord/makerow.cpp | 8 +- textord/oldbasel.cpp | 79 +- textord/pithsync.h | 6 +- textord/pitsync1.h | 6 +- textord/tabvector.cpp | 12 +- textord/textlineprojection.cpp | 2 +- textord/textord.cpp | 122 +- textord/topitch.cpp | 6 +- textord/tospace.cpp | 39 +- textord/tovars.cpp | 10 +- textord/tovars.h | 10 +- training/classifier_tester.cpp | 6 +- training/cntraining.cpp | 21 +- training/commandlineflags.cpp | 24 +- training/commontraining.cpp | 119 +- training/stringrenderer.h | 17 +- training/text2image.cpp | 9 +- viewer/scrollview.cpp | 2 +- viewer/svutil.h | 12 + wordrec/associate.h | 4 +- wordrec/lm_state.h | 14 +- wordrec/measure.h | 42 +- wordrec/pieces.cpp | 1 - 109 files changed, 2450 insertions(+), 2907 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index a0689978e5..55c7892c6e 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -809,9 +809,7 @@ int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks, * has not been subjected to a call of Init, SetImage, Recognize, Clear, End * DetectOS, or anything else that changes the internal PAGE_RES. */ -PageIterator* TessBaseAPI::AnalyseLayout() { - return AnalyseLayout(false); -} +PageIterator* TessBaseAPI::AnalyseLayout() { return AnalyseLayout(false); } PageIterator* TessBaseAPI::AnalyseLayout(bool merge_similar_words) { if (FindLines() == 0) { @@ -948,9 +946,7 @@ void TessBaseAPI::SetInputImage(Pix *pix) { input_image_ = pixCopy(NULL, pix); } -Pix* TessBaseAPI::GetInputImage() { - return input_image_; -} +Pix* TessBaseAPI::GetInputImage() { return input_image_; } const char * TessBaseAPI::GetInputName() { if (input_file_) @@ -1379,8 +1375,9 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it, hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0); } -static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int num2) { - const unsigned long BUFSIZE = 64; +static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, + int num2) { + const size_t BUFSIZE = 64; char id_buffer[BUFSIZE]; if (num2 >= 0) { snprintf(id_buffer, BUFSIZE - 1, "%s_%d_%d", base.c_str(), num1, num2); @@ -1393,8 +1390,7 @@ static void AddIdTohOCR(STRING* hocr_str, const std::string base, int num1, int *hocr_str += "'"; } -static void AddBoxTohOCR(const ResultIterator *it, - PageIteratorLevel level, +static void AddBoxTohOCR(const ResultIterator* it, PageIteratorLevel level, STRING* hocr_str) { int left, top, right, bottom; it->BoundingBox(level, &left, &top, &right, &bottom); @@ -1410,7 +1406,7 @@ static void AddBoxTohOCR(const ResultIterator *it, // add custom height measures float row_height, descenders, ascenders; // row attributes it->RowAttributes(&row_height, &descenders, &ascenders); - // TODO: Do we want to limit these to a single decimal place? + // TODO(rays): Do we want to limit these to a single decimal place? hocr_str->add_str_double("; x_size ", row_height); hocr_str->add_str_double("; x_descenders ", descenders * -1); hocr_str->add_str_double("; x_ascenders ", ascenders); @@ -1418,9 +1414,8 @@ static void AddBoxTohOCR(const ResultIterator *it, *hocr_str += "\">"; } -static void AddBoxToTSV(const PageIterator *it, - PageIteratorLevel level, - STRING* hocr_str) { +static void AddBoxToTSV(const PageIterator* it, PageIteratorLevel level, + STRING* hocr_str) { int left, top, right, bottom; it->BoundingBox(level, &left, &top, &right, &bottom); hocr_str->add_str_int("\t", left); @@ -1429,8 +1424,6 @@ static void AddBoxToTSV(const PageIterator *it, hocr_str->add_str_int("\t", bottom - top); } - - /** * Make a HTML-formatted string with hOCR markup from the internal * data structures. @@ -1440,7 +1433,7 @@ static void AddBoxToTSV(const PageIterator *it, * STL removed from original patch submission and refactored by rays. */ char* TessBaseAPI::GetHOCRText(int page_number) { - return GetHOCRText(NULL,page_number); + return GetHOCRText(NULL, page_number); } /** @@ -1452,13 +1445,12 @@ char* TessBaseAPI::GetHOCRText(int page_number) { * STL removed from original patch submission and refactored by rays. */ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { - if (tesseract_ == NULL || - (page_res_ == NULL && Recognize(monitor) < 0)) + if (tesseract_ == NULL || (page_res_ == NULL && Recognize(monitor) < 0)) return NULL; int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; int page_id = page_number + 1; // hOCR uses 1-based page numbers. - bool para_is_ltr = true; // Default direction is LTR + bool para_is_ltr = true; // Default direction is LTR const char* paragraph_lang = NULL; bool font_info = false; GetBoolVariable("hocr_font_info", &font_info); @@ -1470,13 +1462,13 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { #ifdef _WIN32 // convert input name from ANSI encoding to utf-8 - int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, - NULL, 0); + int str16_len = + MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, NULL, 0); wchar_t *uni16_str = new WCHAR[str16_len]; str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, uni16_str, str16_len); - int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, - 0, NULL, NULL); + int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, 0, + NULL, NULL); char *utf8_str = new char[utf8_len]; WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, NULL, NULL); @@ -1509,7 +1501,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { // Open any new block/paragraph/textline. if (res_it->IsAtBeginningOf(RIL_BLOCK)) { - para_is_ltr = true; // reset to default direction + para_is_ltr = true; // reset to default direction hocr_str += "
WordRecognitionLanguage(); if (paragraph_lang) { - hocr_str += " lang='"; - hocr_str += paragraph_lang; - hocr_str += "'"; + hocr_str += " lang='"; + hocr_str += paragraph_lang; + hocr_str += "'"; } AddBoxTohOCR(res_it, RIL_PARA, &hocr_str); } @@ -1600,7 +1592,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { if (last_word_in_para) { hocr_str += "\n

\n"; pcnt++; - para_is_ltr = true; // back to default direction + para_is_ltr = true; // back to default direction } if (last_word_in_block) { hocr_str += "
\n"; @@ -1620,8 +1612,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { * page_number is 0-based but will appear in the output as 1-based. */ char* TessBaseAPI::GetTSVText(int page_number) { - if (tesseract_ == NULL || - (page_res_ == NULL && Recognize(NULL) < 0)) + if (tesseract_ == NULL || (page_res_ == NULL && Recognize(NULL) < 0)) return NULL; int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; @@ -1629,9 +1620,10 @@ char* TessBaseAPI::GetTSVText(int page_number) { STRING tsv_str(""); - int page_num = page_id, block_num = 0, par_num = 0, line_num = 0, word_num = 0; + int page_num = page_id, block_num = 0, par_num = 0, line_num = 0, + word_num = 0; - tsv_str.add_str_int("1\t", page_num); // level 1 - page + tsv_str.add_str_int("1\t", page_num); // level 1 - page tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); @@ -1642,7 +1634,7 @@ char* TessBaseAPI::GetTSVText(int page_number) { tsv_str.add_str_int("\t", rect_height_); tsv_str += "\t-1\t\n"; - ResultIterator *res_it = GetIterator(); + ResultIterator* res_it = GetIterator(); while (!res_it->Empty(RIL_BLOCK)) { if (res_it->Empty(RIL_WORD)) { res_it->Next(RIL_WORD); @@ -1652,46 +1644,46 @@ char* TessBaseAPI::GetTSVText(int page_number) { // Add rows for any new block/paragraph/textline. if (res_it->IsAtBeginningOf(RIL_BLOCK)) { block_num++, par_num = 0, line_num = 0, word_num = 0; - tsv_str.add_str_int("2\t", page_num); // level 2 - block + tsv_str.add_str_int("2\t", page_num); // level 2 - block tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", word_num); AddBoxToTSV(res_it, RIL_BLOCK, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for block + tsv_str += "\t-1\t\n"; // end of row for block } if (res_it->IsAtBeginningOf(RIL_PARA)) { par_num++, line_num = 0, word_num = 0; - tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph + tsv_str.add_str_int("3\t", page_num); // level 3 - paragraph tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", word_num); AddBoxToTSV(res_it, RIL_PARA, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for para + tsv_str += "\t-1\t\n"; // end of row for para } if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { line_num++, word_num = 0; - tsv_str.add_str_int("4\t", page_num); // level 4 - line + tsv_str.add_str_int("4\t", page_num); // level 4 - line tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); tsv_str.add_str_int("\t", word_num); AddBoxToTSV(res_it, RIL_TEXTLINE, &tsv_str); - tsv_str += "\t-1\t\n"; // end of row for line + tsv_str += "\t-1\t\n"; // end of row for line } // Now, process the word... int left, top, right, bottom; bool bold, italic, underlined, monospace, serif, smallcaps; int pointsize, font_id; - const char *font_name; + const char* font_name; res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); - font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, - &monospace, &serif, &smallcaps, - &pointsize, &font_id); + font_name = + res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, + &serif, &smallcaps, &pointsize, &font_id); word_num++; - tsv_str.add_str_int("5\t", page_num); // level 5 - word + tsv_str.add_str_int("5\t", page_num); // level 5 - word tsv_str.add_str_int("\t", block_num); tsv_str.add_str_int("\t", par_num); tsv_str.add_str_int("\t", line_num); @@ -1712,11 +1704,11 @@ char* TessBaseAPI::GetTSVText(int page_number) { tsv_str += res_it->GetUTF8Text(RIL_SYMBOL); res_it->Next(RIL_SYMBOL); } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); - tsv_str += "\n"; // end of row + tsv_str += "\n"; // end of row wcnt++; } - char *ret = new char[tsv_str.length() + 1]; + char* ret = new char[tsv_str.length() + 1]; strcpy(ret, tsv_str.string()); delete res_it; return ret; @@ -1907,17 +1899,17 @@ char* TessBaseAPI::GetUNLVText() { return result; } - /** - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - * page_number is a 0-based page index that will appear in the osd file. - */ +/** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * page_number is a 0-based page index that will appear in the osd file. + */ char* TessBaseAPI::GetOsdText(int page_number) { OSResults osr; bool osd = DetectOS(&osr); if (!osd) { - return NULL; + return NULL; } int orient_id = osr.best_result.orientation_id; @@ -1931,19 +1923,18 @@ char* TessBaseAPI::GetOsdText(int page_number) { int orient_deg = orient_id * 90; // clockwise rotation needed to make the page upright - int rotate = OrientationIdToValue(orient_id); + int rotate = OrientationIdToValue(orient_id); char* osd_buf = new char[255]; snprintf(osd_buf, 255, - "Page number: %d\n" - "Orientation in degrees: %d\n" - "Rotate: %d\n" - "Orientation confidence: %.2f\n" - "Script: %s\n" - "Script confidence: %.2f\n", - page_number, - orient_deg, rotate, orient_conf, - script_name, script_conf); + "Page number: %d\n" + "Orientation in degrees: %d\n" + "Rotate: %d\n" + "Orientation confidence: %.2f\n" + "Script: %s\n" + "Script confidence: %.2f\n", + page_number, orient_deg, rotate, orient_conf, script_name, + script_conf); return osd_buf; } diff --git a/api/baseapi.h b/api/baseapi.h index 3b0d3f67ce..d872689eec 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -588,8 +588,8 @@ class TESS_API TessBaseAPI { * data structures. * page_number is 0-based but will appear in the output as 1-based. * monitor can be used to - * cancel the recognition - * receive progress callbacks + * cancel the recognition + * receive progress callbacks */ char* GetHOCRText(ETEXT_DESC* monitor, int page_number); @@ -750,13 +750,9 @@ class TESS_API TessBaseAPI { */ static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); - Tesseract* tesseract() const { - return tesseract_; - } + Tesseract* tesseract() const { return tesseract_; } - OcrEngineMode oem() const { - return last_oem_requested_; - } + OcrEngineMode oem() const { return last_oem_requested_; } void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } @@ -898,7 +894,7 @@ class TESS_API TessBaseAPI { const char* retry_config, int timeout_millisec, TessResultRenderer* renderer, int tessedit_page_number); - // TIFF supports multipage so gets special consideration + // TIFF supports multipage so gets special consideration. bool ProcessPagesMultipageTiff(const unsigned char *data, size_t size, const char* filename, diff --git a/api/pdfrenderer.cpp b/api/pdfrenderer.cpp index 4708300492..dc90c5a3bf 100644 --- a/api/pdfrenderer.cpp +++ b/api/pdfrenderer.cpp @@ -20,12 +20,12 @@ #include "config_auto.h" #endif +#include "allheaders.h" #include "baseapi.h" -#include "renderer.h" #include "math.h" +#include "renderer.h" #include "strngs.h" #include "tprintf.h" -#include "allheaders.h" #ifdef _MSC_VER #include "mathfix.h" @@ -282,7 +282,7 @@ void AffineMatrix(int writing_direction, } } -// There are some really stupid PDF viewers in the wild, such as +// There are some really awkward PDF viewers in the wild, such as // 'Preview' which ships with the Mac. They do a better job with text // selection and highlighting when given perfectly flat baseline // instead of very slightly tilted. We clip small tilts to appease @@ -441,8 +441,8 @@ char* TessPDFRenderer::GetPDFTextObjects(TessBaseAPI* api, int code = unicodes[i]; // Convert to UTF-16BE https://en.wikipedia.org/wiki/UTF-16 if ((code > 0xD7FF && code < 0xE000) || code > 0x10FFFF) { - tprintf("Dropping invalid codepoint %d\n", code); - continue; + tprintf("Dropping invalid codepoint %d\n", code); + continue; } if (code < 0x10000) { snprintf(utf16, sizeof(utf16), "<%04X>", code); @@ -567,7 +567,8 @@ bool TessPDFRenderer::BeginDocumentHandler() { "<<\n" " /Length %lu /Filter /FlateDecode\n" ">>\n" - "stream\n", (unsigned long)len); + "stream\n", + (unsigned long)len); if (n >= sizeof(buf)) { lept_free(comp); return false; diff --git a/api/renderer.cpp b/api/renderer.cpp index 4a88a24608..e683149381 100644 --- a/api/renderer.cpp +++ b/api/renderer.cpp @@ -155,11 +155,11 @@ TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info) bool TessHOcrRenderer::BeginDocumentHandler() { AppendString( - "\n" - "\n" - "\n \n "); + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n" + " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n" + "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" " + "lang=\"en\">\n <head>\n <title>"); AppendString(title()); AppendString( "\n" @@ -198,25 +198,25 @@ bool TessHOcrRenderer::AddImageHandler(TessBaseAPI* api) { /********************************************************************** * TSV Text Renderer interface implementation **********************************************************************/ -TessTsvRenderer::TessTsvRenderer(const char *outputbase) +TessTsvRenderer::TessTsvRenderer(const char* outputbase) : TessResultRenderer(outputbase, "tsv") { - font_info_ = false; + font_info_ = false; } -TessTsvRenderer::TessTsvRenderer(const char *outputbase, bool font_info) +TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info) : TessResultRenderer(outputbase, "tsv") { - font_info_ = font_info; + font_info_ = font_info; } bool TessTsvRenderer::BeginDocumentHandler() { // Output TSV column headings - AppendString("level\tpage_num\tblock_num\tpar_num\tline_num\tword_num\tleft\ttop\twidth\theight\tconf\ttext\n"); + AppendString( + "level\tpage_num\tblock_num\tpar_num\tline_num\tword_" + "num\tleft\ttop\twidth\theight\tconf\ttext\n"); return true; } -bool TessTsvRenderer::EndDocumentHandler() { - return true; -} +bool TessTsvRenderer::EndDocumentHandler() { return true; } bool TessTsvRenderer::AddImageHandler(TessBaseAPI* api) { char* tsv = api->GetTSVText(imagenum()); @@ -266,8 +266,7 @@ bool TessBoxTextRenderer::AddImageHandler(TessBaseAPI* api) { * Osd Text Renderer interface implementation **********************************************************************/ TessOsdRenderer::TessOsdRenderer(const char* outputbase) - : TessResultRenderer(outputbase, "osd") { -} + : TessResultRenderer(outputbase, "osd") {} bool TessOsdRenderer::AddImageHandler(TessBaseAPI* api) { char* osd = api->GetOsdText(imagenum()); diff --git a/api/renderer.h b/api/renderer.h index 6b47813f7b..ac64e4b452 100644 --- a/api/renderer.h +++ b/api/renderer.h @@ -126,7 +126,7 @@ class TESS_API TessResultRenderer { private: const char* file_extension_; // standard extension for generated output - const char* title_; // title of document being renderered + const char* title_; // title of document being renderered int imagenum_; // index of last image added FILE* fout_; // output file pointer @@ -153,13 +153,13 @@ class TESS_API TessHOcrRenderer : public TessResultRenderer { explicit TessHOcrRenderer(const char *outputbase, bool font_info); explicit TessHOcrRenderer(const char *outputbase); -protected: + protected: virtual bool BeginDocumentHandler(); virtual bool AddImageHandler(TessBaseAPI* api); virtual bool EndDocumentHandler(); -private: - bool font_info_; // whether to print font information + private: + bool font_info_; // whether to print font information }; /** @@ -167,15 +167,15 @@ class TESS_API TessHOcrRenderer : public TessResultRenderer { */ class TESS_API TessTsvRenderer : public TessResultRenderer { public: - explicit TessTsvRenderer(const char *outputbase, bool font_info); - explicit TessTsvRenderer(const char *outputbase); + explicit TessTsvRenderer(const char* outputbase, bool font_info); + explicit TessTsvRenderer(const char* outputbase); -protected: + protected: virtual bool BeginDocumentHandler(); virtual bool AddImageHandler(TessBaseAPI* api); virtual bool EndDocumentHandler(); -private: + private: bool font_info_; // whether to print font information }; @@ -188,12 +188,12 @@ class TESS_API TessPDFRenderer : public TessResultRenderer { // we load a custom PDF font from this location. TessPDFRenderer(const char *outputbase, const char *datadir); -protected: + protected: virtual bool BeginDocumentHandler(); virtual bool AddImageHandler(TessBaseAPI* api); virtual bool EndDocumentHandler(); -private: + private: // We don't want to have every image in memory at once, // so we store some metadata as we go along producing // PDFs one page at a time. At the end that metadata is diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 3fe8dc8c53..71a0a6a8b7 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -40,28 +40,28 @@ static void Win32WarningHandler(const char* module, const char* fmt, va_list ap) { - if (module != NULL) { - fprintf(stderr, "%s: ", module); - } - fprintf(stderr, "Warning, "); - vfprintf(stderr, fmt, ap); - fprintf(stderr, ".\n"); + if (module != NULL) { + fprintf(stderr, "%s: ", module); + } + fprintf(stderr, "Warning, "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, ".\n"); } #endif /* HAVE_TIFFIO_H && _WIN32 */ void PrintVersionInfo() { - char *versionStrP; + char* versionStrP; - printf("tesseract %s\n", tesseract::TessBaseAPI::Version()); + printf("tesseract %s\n", tesseract::TessBaseAPI::Version()); - versionStrP = getLeptonicaVersion(); - printf(" %s\n", versionStrP); - lept_free(versionStrP); + versionStrP = getLeptonicaVersion(); + printf(" %s\n", versionStrP); + lept_free(versionStrP); - versionStrP = getImagelibVersions(); - printf(" %s\n", versionStrP); - lept_free(versionStrP); + versionStrP = getImagelibVersions(); + printf(" %s\n", versionStrP); + lept_free(versionStrP); #ifdef USE_OPENCL cl_platform_id platform; @@ -82,7 +82,7 @@ void PrintVersionInfo() { printf(" Found %d devices.\n", num_devices); for (i = 0; i < num_devices; ++i) { clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0); - printf(" Device %d name: %s.\n", i+1, info); + printf(" Device %d name: %s.\n", i + 1, info); } #endif } @@ -90,7 +90,7 @@ void PrintVersionInfo() { void PrintUsage(const char* program) { printf( "Usage:\n" - " %s --help | --help-psm | --version\n" + " %s --help | --help-psm | --help-oem | --version\n" " %s --list-langs [--tessdata-dir PATH]\n" " %s --print-parameters [options...] [configfile...]\n" " %s imagename|stdin outputbase|stdout [options...] [configfile...]\n", @@ -100,28 +100,26 @@ void PrintUsage(const char* program) { void PrintHelpForPSM() { const char* msg = "Page segmentation modes:\n" - " 0 Orientation and script detection (OSD) only.\n" - " 1 Automatic page segmentation with OSD.\n" - " 2 Automatic page segmentation, but no OSD, or OCR.\n" - " 3 Fully automatic page segmentation, but no OSD. (Default)\n" - " 4 Assume a single column of text of variable sizes.\n" - " 5 Assume a single uniform block of vertically aligned text.\n" - " 6 Assume a single uniform block of text.\n" - " 7 Treat the image as a single text line.\n" - " 8 Treat the image as a single word.\n" - " 9 Treat the image as a single word in a circle.\n" - " 10 Treat the image as a single character.\n" - - //TODO: Consider publishing these modes. - #if 0 - " 11 Sparse text. Find as much text as possible in no" - " particular order.\n" - " 12 Sparse text with OSD.\n" - " 13 Raw line. Treat the image as a single text line,\n" - "\t\t\tbypassing hacks that are Tesseract-specific.\n" - #endif - ; - + " 0 Orientation and script detection (OSD) only.\n" + " 1 Automatic page segmentation with OSD.\n" + " 2 Automatic page segmentation, but no OSD, or OCR.\n" + " 3 Fully automatic page segmentation, but no OSD. (Default)\n" + " 4 Assume a single column of text of variable sizes.\n" + " 5 Assume a single uniform block of vertically aligned text.\n" + " 6 Assume a single uniform block of text.\n" + " 7 Treat the image as a single text line.\n" + " 8 Treat the image as a single word.\n" + " 9 Treat the image as a single word in a circle.\n" + " 10 Treat the image as a single character.\n" + //TODO: Consider publishing these modes. + #if 0 + " 11 Sparse text. Find as much text as possible in no" + " particular order.\n" + " 12 Sparse text with OSD.\n" + " 13 Raw line. Treat the image as a single text line,\n" + "\t\t\tbypassing hacks that are Tesseract-specific.\n" + #endif + ; printf("%s", msg); } @@ -137,31 +135,30 @@ void PrintHelpMessage(const char* program) { " -c VAR=VALUE Set value for config variables.\n" " Multiple -c arguments are allowed.\n" " -psm NUM Specify page segmentation mode.\n" - "NOTE: These options must occur before any configfile.\n" - ; + "NOTE: These options must occur before any configfile.\n"; printf("\n%s\n", ocr_options); PrintHelpForPSM(); - const char *single_options = + const char* single_options = "Single options:\n" " -h, --help Show this help message.\n" " --help-psm Show page segmentation modes.\n" " -v, --version Show version information.\n" " --list-langs List available languages for tesseract engine.\n" - " --print-parameters Print tesseract parameters to stdout.\n" - ; + " --print-parameters Print tesseract parameters to stdout.\n"; printf("\n%s", single_options); } -void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, char** argv) { +void SetVariablesFromCLArgs(tesseract::TessBaseAPI* api, int argc, + char** argv) { char opt1[256], opt2[255]; for (int i = 0; i < argc; i++) { if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) { strncpy(opt1, argv[i + 1], 255); opt1[255] = '\0'; - char *p = strchr(opt1, '='); + char* p = strchr(opt1, '='); if (!p) { fprintf(stderr, "Missing = in configvar assignment\n"); exit(1); @@ -190,8 +187,8 @@ void PrintLangsList(tesseract::TessBaseAPI* api) { } void PrintBanner() { - tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", - tesseract::TessBaseAPI::Version()); + tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", + tesseract::TessBaseAPI::Version()); } /** @@ -209,31 +206,25 @@ void PrintBanner() { * but that doesn't work. */ void FixPageSegMode(tesseract::TessBaseAPI* api, - tesseract::PageSegMode pagesegmode) { + tesseract::PageSegMode pagesegmode) { if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) - api->SetPageSegMode(pagesegmode); + api->SetPageSegMode(pagesegmode); } // NOTE: arg_i is used here to avoid ugly *i so many times in this function -void ParseArgs(const int argc, char** argv, - const char** lang, - const char** image, - const char** outputbase, - const char** datapath, - bool* list_langs, - bool* print_parameters, - GenericVector* vars_vec, - GenericVector* vars_values, - int* arg_i, - tesseract::PageSegMode* pagesegmode) { +void ParseArgs(const int argc, char** argv, const char** lang, + const char** image, const char** outputbase, + const char** datapath, bool* list_langs, bool* print_parameters, + GenericVector* vars_vec, + GenericVector* vars_values, int* arg_i, + tesseract::PageSegMode* pagesegmode) { if (argc == 1) { PrintHelpMessage(argv[0]); exit(0); } if (argc == 2) { - if ((strcmp(argv[1], "-h") == 0) || - (strcmp(argv[1], "--help") == 0)) { + if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) { PrintHelpMessage(argv[0]); exit(0); } @@ -241,8 +232,7 @@ void ParseArgs(const int argc, char** argv, PrintHelpForPSM(); exit(0); } - if ((strcmp(argv[1], "-v") == 0) || - (strcmp(argv[1], "--version") == 0)) { + if ((strcmp(argv[1], "-v") == 0) || (strcmp(argv[1], "--version") == 0)) { PrintVersionInfo(); exit(0); } @@ -298,10 +288,10 @@ void ParseArgs(const int argc, char** argv, } } -void PreloadRenderers(tesseract::TessBaseAPI* api, - tesseract::PointerVector* renderers, - tesseract::PageSegMode pagesegmode, - const char* outputbase) { +void PreloadRenderers( + tesseract::TessBaseAPI* api, + tesseract::PointerVector* renderers, + tesseract::PageSegMode pagesegmode, const char* outputbase) { if (pagesegmode == tesseract::PSM_OSD_ONLY) { renderers->push_back(new tesseract::TessOsdRenderer(outputbase)); } else { @@ -311,7 +301,7 @@ void PreloadRenderers(tesseract::TessBaseAPI* api, bool font_info; api->GetBoolVariable("hocr_font_info", &font_info); renderers->push_back( - new tesseract::TessHOcrRenderer(outputbase, font_info)); + new tesseract::TessHOcrRenderer(outputbase, font_info)); } api->GetBoolVariable("tessedit_create_tsv", &b); @@ -324,8 +314,8 @@ void PreloadRenderers(tesseract::TessBaseAPI* api, api->GetBoolVariable("tessedit_create_pdf", &b); if (b) { - renderers->push_back(new tesseract::TessPDFRenderer(outputbase, - api->GetDatapath())); + renderers->push_back( + new tesseract::TessPDFRenderer(outputbase, api->GetDatapath())); } api->GetBoolVariable("tessedit_write_unlv", &b); @@ -359,8 +349,7 @@ void PreloadRenderers(tesseract::TessBaseAPI* api, * **********************************************************************/ - -int main(int argc, char **argv) { +int main(int argc, char** argv) { const char* lang = "eng"; const char* image = NULL; const char* outputbase = NULL; @@ -380,10 +369,8 @@ int main(int argc, char **argv) { TIFFSetWarningHandler(Win32WarningHandler); #endif /* HAVE_TIFFIO_H && _WIN32 */ - ParseArgs(argc, argv, - &lang, &image, &outputbase, &datapath, - &list_langs, &print_parameters, - &vars_vec, &vars_values, &arg_i, &pagesegmode); + ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs, + &print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode); bool banner = false; if (outputbase != NULL && strcmp(outputbase, "-") && @@ -406,8 +393,8 @@ int main(int argc, char **argv) { SetVariablesFromCLArgs(&api, argc, argv); if (list_langs) { - PrintLangsList(&api); - exit(0); + PrintLangsList(&api); + exit(0); } if (print_parameters) { @@ -436,12 +423,13 @@ int main(int argc, char **argv) { tesseract::TextlineOrder order; float deskew_angle; - tesseract::PageIterator* it = api.AnalyseLayout(); + tesseract::PageIterator* it = api.AnalyseLayout(); if (it) { it->Orientation(&orientation, &direction, &order, &deskew_angle); - tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \ - "Deskew angle: %.4f\n", - orientation, direction, order, deskew_angle); + tprintf( + "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" + "Deskew angle: %.4f\n", + orientation, direction, order, deskew_angle); } else { ret_val = 1; } @@ -456,14 +444,12 @@ int main(int argc, char **argv) { // ambigs.train, box.train, box.train.stderr, linebox, rebox bool b = false; bool in_training_mode = - (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || - (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || - (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); + (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) || + (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) || + (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b); tesseract::PointerVector renderers; - - if (in_training_mode) { renderers.push_back(NULL); } else { diff --git a/ccmain/control.cpp b/ccmain/control.cpp index 3970c5429e..5953698932 100644 --- a/ccmain/control.cpp +++ b/ccmain/control.cpp @@ -1,8 +1,8 @@ /****************************************************************** * File: control.cpp (Formerly control.c) * Description: Module-independent matcher controller. - * Author: Ray Smith - * Created: Thu Apr 23 11:09:58 BST 1992 + * Author: Ray Smith + * Created: Thu Apr 23 11:09:58 BST 1992 * ReHacked: Tue Sep 22 08:42:49 BST 1992 Phil Cheatle * * (C) Copyright 1992, Hewlett-Packard Ltd. @@ -73,7 +73,6 @@ void Tesseract::recog_pseudo_word(PAGE_RES* page_res, } } - /** * Recognize a single word in interactive mode. * @@ -219,16 +218,14 @@ bool Tesseract::RecogAllWordsPassN(int pass_n, ETEXT_DESC* monitor, if (pass_n == 1) { monitor->progress = 70 * w / words->size(); if (monitor->progress_callback != NULL) { - TBOX box = pr_it->word()->word->bounding_box(); - (*monitor->progress_callback)(monitor->progress, - box.left(), box.right(), - box.top(), box.bottom()); + TBOX box = pr_it->word()->word->bounding_box(); + (*monitor->progress_callback)(monitor->progress, box.left(), + box.right(), box.top(), box.bottom()); } } else { monitor->progress = 70 + 30 * w / words->size(); - if (monitor->progress_callback!=NULL) { - (*monitor->progress_callback)(monitor->progress, - 0, 0, 0, 0); + if (monitor->progress_callback != NULL) { + (*monitor->progress_callback)(monitor->progress, 0, 0, 0, 0); } } if (monitor->deadline_exceeded() || diff --git a/ccmain/docqual.cpp b/ccmain/docqual.cpp index c6e7f17e0f..4706fb3b26 100644 --- a/ccmain/docqual.cpp +++ b/ccmain/docqual.cpp @@ -1,8 +1,8 @@ /****************************************************************** * File: docqual.cpp (Formerly docqual.c) * Description: Document Quality Metrics - * Author: Phil Cheatle - * Created: Mon May 9 11:27:28 BST 1994 + * Author: Phil Cheatle + * Created: Mon May 9 11:27:28 BST 1994 * * (C) Copyright 1994, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -98,8 +98,8 @@ void Tesseract::word_char_quality(WERD_RES *word, ROW *row, inT16 *match_count, inT16 *accepted_match_count) { - if (word->bln_boxes == NULL || - word->rebuild_word == NULL || word->rebuild_word->blobs.empty()) { + if (word->bln_boxes == NULL || word->rebuild_word == NULL || + word->rebuild_word->blobs.empty()) { *match_count = 0; *accepted_match_count = 0; return; @@ -132,7 +132,7 @@ inT16 Tesseract::count_outline_errs(char c, inT16 outline_count) { int expected_outline_count; if (STRING (outlines_odd).contains (c)) - return 0; //Don't use this char + return 0; // Don't use this char else if (STRING (outlines_2).contains (c)) expected_outline_count = 2; else @@ -151,17 +151,16 @@ void Tesseract::quality_based_rejection(PAGE_RES_IT &page_res_it, } } - /************************************************************************* * unrej_good_quality_words() * Accept potential rejects in words which pass the following checks: * - Contains a potential reject * - Word looks like a sensible alpha word. * - Word segmentation is the same as the original image - * - All characters have the expected number of outlines + * - All characters have the expected number of outlines * NOTE - the rejection counts are recalculated after unrejection * - CAN'T do it in a single pass without a bit of fiddling - * - keep it simple but inefficient + * - keep it simple but inefficient *************************************************************************/ void Tesseract::unrej_good_quality_words( //unreject potential PAGE_RES_IT &page_res_it) { @@ -403,7 +402,6 @@ void Tesseract::doc_and_block_rejection( //reject big chunks } // namespace tesseract - /************************************************************************* * reject_whole_page() * Don't believe any of it - set the reject map to 00..00 in all words diff --git a/ccmain/fixspace.cpp b/ccmain/fixspace.cpp index f58c9610fa..5fbe8c9a4e 100644 --- a/ccmain/fixspace.cpp +++ b/ccmain/fixspace.cpp @@ -3,8 +3,8 @@ * Description: Implements a pass over the page res, exploring the alternative * spacing possibilities, trying to use context to improve the * word spacing -* Author: Phil Cheatle -* Created: Thu Oct 21 11:38:43 BST 1993 +* Author: Phil Cheatle +* Created: Thu Oct 21 11:38:43 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -211,7 +211,6 @@ void Tesseract::match_current_words(WERD_RES_LIST &words, ROW *row, } } - /** * @name eval_word_spacing() * The basic measure is the number of characters in contextually confirmed diff --git a/ccmain/ltrresultiterator.cpp b/ccmain/ltrresultiterator.cpp index d5b8594667..f80e594518 100644 --- a/ccmain/ltrresultiterator.cpp +++ b/ccmain/ltrresultiterator.cpp @@ -145,13 +145,12 @@ float LTRResultIterator::Confidence(PageIteratorLevel level) const { return 0.0f; } -void LTRResultIterator::RowAttributes(float* row_height, - float* descenders, +void LTRResultIterator::RowAttributes(float* row_height, float* descenders, float* ascenders) const { - *row_height = it_->row()->row->x_height() + it_->row()-> row->ascenders() - - it_->row()->row->descenders(); - *descenders = it_->row()->row->descenders(); - *ascenders = it_->row()->row->ascenders(); + *row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() - + it_->row()->row->descenders(); + *descenders = it_->row()->row->descenders(); + *ascenders = it_->row()->row->ascenders(); } // Returns the font attributes of the current word. If iterating at a higher diff --git a/ccmain/ltrresultiterator.h b/ccmain/ltrresultiterator.h index 8819c2a0ee..f2605b52d2 100644 --- a/ccmain/ltrresultiterator.h +++ b/ccmain/ltrresultiterator.h @@ -92,8 +92,7 @@ class TESS_API LTRResultIterator : public PageIterator { float Confidence(PageIteratorLevel level) const; // Returns the attributes of the current row. - void RowAttributes(float* row_height, - float* descenders, + void RowAttributes(float* row_height, float* descenders, float* ascenders) const; // ============= Functions that refer to words only ============. diff --git a/ccmain/output.cpp b/ccmain/output.cpp index ddfcfc54b6..6fca63e420 100644 --- a/ccmain/output.cpp +++ b/ccmain/output.cpp @@ -1,8 +1,8 @@ /****************************************************************** * File: output.cpp (Formerly output.c) * Description: Output pass - * Author: Phil Cheatle - * Created: Thu Aug 4 10:56:08 BST 1994 + * Author: Phil Cheatle + * Created: Thu Aug 4 10:56:08 BST 1994 * * (C) Copyright 1994, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -78,18 +78,16 @@ void Tesseract::output_pass( //Tess output pass //send to api while (page_res_it.word () != NULL) { check_debug_pt (page_res_it.word (), 120); - if (target_word_box) - { - - TBOX current_word_box=page_res_it.word ()->word->bounding_box(); - FCOORD center_pt((current_word_box.right()+current_word_box.left())/2,(current_word_box.bottom()+current_word_box.top())/2); - if (!target_word_box->contains(center_pt)) - { - page_res_it.forward (); - continue; - } - - } + if (target_word_box) { + TBOX current_word_box = page_res_it.word()->word->bounding_box(); + FCOORD center_pt( + (current_word_box.right() + current_word_box.left()) / 2, + (current_word_box.bottom() + current_word_box.top()) / 2); + if (!target_word_box->contains(center_pt)) { + page_res_it.forward(); + continue; + } + } if (tessedit_write_block_separators && block_of_last_word != page_res_it.block ()) { block_of_last_word = page_res_it.block (); @@ -337,7 +335,7 @@ void Tesseract::set_unlv_suspects(WERD_RES *word_res) { rating_per_ch = word.rating() / word_res->reject_map.length(); if (rating_per_ch >= suspect_rating_per_ch) - return; //Don't touch bad ratings + return; // Don't touch bad ratings if ((word_res->tess_accepted) || (rating_per_ch < suspect_accept_rating)) { /* Unreject any Tess Acceptable word - but NOT tess reject chs*/ diff --git a/ccmain/pagesegmain.cpp b/ccmain/pagesegmain.cpp index 4e3c342070..f4401d64d9 100644 --- a/ccmain/pagesegmain.cpp +++ b/ccmain/pagesegmain.cpp @@ -412,9 +412,10 @@ ColumnFinder* Tesseract::SetupPageSegAndDetectOrientation( "Don't rotate.\n", osd_margin); osd_orientation = 0; } else { - tprintf("OSD: Weak margin (%.2f) for %d blob text block, " - "but using orientation anyway: %d\n", - osd_margin, osd_blobs.length(), osd_orientation); + tprintf( + "OSD: Weak margin (%.2f) for %d blob text block, " + "but using orientation anyway: %d\n", + osd_margin, osd_blobs.length(), osd_orientation); } } } diff --git a/ccmain/paramsd.cpp b/ccmain/paramsd.cpp index 7784f85361..e0e60539f6 100644 --- a/ccmain/paramsd.cpp +++ b/ccmain/paramsd.cpp @@ -329,13 +329,19 @@ void ParamsEditor::WriteParams(char *filename, fclose(fp); sprintf (msg_str, "Overwrite file " "%s" "? (Y/N)", filename); int a = sv_window_->ShowYesNoDialog(msg_str); - if (a == 'n') { return; } // don't write + if (a == 'n') { + return; + } // don't write } fp = fopen (filename, "wb"); // can we write to it? if (fp == NULL) { - sv_window_->AddMessage("Can't write to file " "%s" "", filename); + sv_window_->AddMessage( + "Can't write to file " + "%s" + "", + filename); return; } diff --git a/ccmain/reject.cpp b/ccmain/reject.cpp index aacc80dd6e..72f9d873d1 100644 --- a/ccmain/reject.cpp +++ b/ccmain/reject.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: reject.cpp (Formerly reject.c) * Description: Rejection functions used in tessedit - * Author: Phil Cheatle - * Created: Wed Sep 23 16:50:21 BST 1992 + * Author: Phil Cheatle + * Created: Wed Sep 23 16:50:21 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccmain/tessedit.cpp b/ccmain/tessedit.cpp index dd96ba0ebd..8c1fb80837 100644 --- a/ccmain/tessedit.cpp +++ b/ccmain/tessedit.cpp @@ -44,7 +44,7 @@ #include "params.h" #define VARDIR "configs/" /*variables files */ - //config under api + // config under api #define API_CONFIG "configs/api_config" ETEXT_DESC *global_monitor = NULL; // progress monitor diff --git a/ccmain/tesseractclass.cpp b/ccmain/tesseractclass.cpp index 8db50fbd54..f0cc1bfffe 100644 --- a/ccmain/tesseractclass.cpp +++ b/ccmain/tesseractclass.cpp @@ -398,8 +398,8 @@ Tesseract::Tesseract() "Don't suspect dict wds longer than this", this->params()), BOOL_MEMBER(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected", this->params()), - double_MEMBER(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit", - this->params()), + double_MEMBER(suspect_rating_per_ch, 999.9, + "Don't touch bad rating limit", this->params()), double_MEMBER(suspect_accept_rating, -999.9, "Accept good rating limit", this->params()), BOOL_MEMBER(tessedit_minimal_rejection, false, @@ -512,7 +512,6 @@ Tesseract::Tesseract() "Page separator (default is form feed control character)", this->params()), - // The following parameters were deprecated and removed from their // original // locations. The parameters are temporarily kept here to give Tesseract diff --git a/ccmain/tesseractclass.h b/ccmain/tesseractclass.h index 91d25bc8ae..5bc0b102c6 100644 --- a/ccmain/tesseractclass.h +++ b/ccmain/tesseractclass.h @@ -1010,8 +1010,7 @@ class Tesseract : public Wordrec { INT_VAR_H(suspect_level, 99, "Suspect marker level"); INT_VAR_H(suspect_space_level, 100, "Min suspect level for rejecting spaces"); - INT_VAR_H(suspect_short_words, 2, - "Don't Suspect dict wds longer than this"); + INT_VAR_H(suspect_short_words, 2, "Don't Suspect dict wds longer than this"); BOOL_VAR_H(suspect_constrain_1Il, false, "UNLV keep 1Il chars rejected"); double_VAR_H(suspect_rating_per_ch, 999.9, "Don't touch bad rating limit"); double_VAR_H(suspect_accept_rating, -999.9, "Accept good rating limit"); diff --git a/ccstruct/blobbox.cpp b/ccstruct/blobbox.cpp index 280096b5d3..47a625af32 100644 --- a/ccstruct/blobbox.cpp +++ b/ccstruct/blobbox.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: blobbox.cpp (Formerly blobnbox.c) * Description: Code for the textord blob class. - * Author: Ray Smith - * Created: Thu Jul 30 09:08:51 BST 1992 + * Author: Ray Smith + * Created: Thu Jul 30 09:08:51 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccstruct/boxread.cpp b/ccstruct/boxread.cpp index f4aedca5b3..fee0aa9aef 100644 --- a/ccstruct/boxread.cpp +++ b/ccstruct/boxread.cpp @@ -34,8 +34,7 @@ FILE* OpenBoxFile(const STRING& fname) { STRING filename = BoxFileName(fname); FILE* box_file = NULL; if (!(box_file = fopen(filename.string(), "rb"))) { - CANTOPENFILE.error("read_next_box", TESSEXIT, - "Can't open box file %s", + CANTOPENFILE.error("read_next_box", TESSEXIT, "Can't open box file %s", filename.string()); } return box_file; diff --git a/ccstruct/boxword.h b/ccstruct/boxword.h index 742bbb8e4d..c1fab068bb 100644 --- a/ccstruct/boxword.h +++ b/ccstruct/boxword.h @@ -82,9 +82,7 @@ class BoxWord { const TBOX& bounding_box() const { return bbox_; } - int length() const { - return length_; - } + int length() const { return length_; } const TBOX& BlobBox(int index) const { return boxes_[index]; } diff --git a/ccstruct/coutln.cpp b/ccstruct/coutln.cpp index bc2b119d8c..238272d2c6 100644 --- a/ccstruct/coutln.cpp +++ b/ccstruct/coutln.cpp @@ -48,9 +48,9 @@ ICOORD C_OUTLINE::step_coords[4] = { * @param length length of loop */ -C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left, - ICOORD top_right, inT16 length) - : box (bot_left, top_right), start (startpt->pos), offsets(NULL) { +C_OUTLINE::C_OUTLINE(CRACKEDGE* startpt, ICOORD bot_left, ICOORD top_right, + inT16 length) + : box(bot_left, top_right), start(startpt->pos), offsets(NULL) { inT16 stepindex; //index to step CRACKEDGE *edgept; //current point @@ -71,7 +71,6 @@ C_OUTLINE::C_OUTLINE (CRACKEDGE * startpt, ICOORD bot_left, } } - /** * @name C_OUTLINE::C_OUTLINE * @@ -139,7 +138,7 @@ inT16 length //length of loop * @param rotation rotate to coord */ -C_OUTLINE::C_OUTLINE(C_OUTLINE *srcline, FCOORD rotation) : offsets(NULL) { +C_OUTLINE::C_OUTLINE(C_OUTLINE* srcline, FCOORD rotation) : offsets(NULL) { TBOX new_box; //easy bounding inT16 stepindex; //index to step inT16 dirdiff; //direction change @@ -300,7 +299,6 @@ inT32 C_OUTLINE::perimeter() const { return total_steps; } - /** * @name C_OUTLINE::outer_area * @@ -332,7 +330,6 @@ inT32 C_OUTLINE::outer_area() const { return total; } - /** * @name C_OUTLINE::count_transitions * @@ -459,7 +456,6 @@ inT32 C_OUTLINE::count_transitions(inT32 threshold) { return total; } - /** * @name C_OUTLINE::operator< * @@ -468,8 +464,7 @@ inT32 C_OUTLINE::count_transitions(inT32 threshold) { */ BOOL8 -C_OUTLINE::operator< (const C_OUTLINE & other) const -{ +C_OUTLINE::operator<(const C_OUTLINE& other) const { inT16 count = 0; //winding count ICOORD pos; //position of point inT32 stepindex; //index to cstep @@ -495,7 +490,6 @@ C_OUTLINE::operator< (const C_OUTLINE & other) const return count != 0; } - /** * @name C_OUTLINE::winding_number * @@ -534,7 +528,6 @@ inT16 C_OUTLINE::winding_number(ICOORD point) const { return count; //winding number } - /** * C_OUTLINE::turn_direction * @@ -563,7 +556,6 @@ inT16 C_OUTLINE::turn_direction() const { //winding number return count; //winding number } - /** * @name C_OUTLINE::reverse * @@ -586,7 +578,6 @@ void C_OUTLINE::reverse() { //reverse drection } } - /** * @name C_OUTLINE::move * @@ -661,14 +652,27 @@ static void ComputeGradient(const l_uint32* data, int wpl, int x, int y, int width, int height, ICOORD* gradient) { const l_uint32* line = data + y * wpl; - int pix_x_y = x < width && y < height ? - GET_DATA_BYTE(const_cast (reinterpret_cast(line)), x) : 255; - int pix_x_prevy = x < width && y > 0 ? - GET_DATA_BYTE(const_cast (reinterpret_cast(line - wpl)), x) : 255; - int pix_prevx_prevy = x > 0 && y > 0 ? - GET_DATA_BYTE(const_cast (reinterpret_cast(line - wpl)), x - 1) : 255; - int pix_prevx_y = x > 0 && y < height ? - GET_DATA_BYTE(const_cast (reinterpret_cast(line)), x - 1) : 255; + int pix_x_y = + x < width && y < height + ? GET_DATA_BYTE( + const_cast(reinterpret_cast(line)), x) + : 255; + int pix_x_prevy = + x < width && y > 0 + ? GET_DATA_BYTE( + const_cast(reinterpret_cast(line - wpl)), x) + : 255; + int pix_prevx_prevy = + x > 0 && y > 0 + ? GET_DATA_BYTE( + const_cast(reinterpret_cast(line - wpl)), + x - 1) + : 255; + int pix_prevx_y = + x > 0 && y < height + ? GET_DATA_BYTE( + const_cast(reinterpret_cast(line)), x - 1) + : 255; gradient->set_x(pix_x_y + pix_x_prevy - (pix_prevx_y + pix_prevx_prevy)); gradient->set_y(pix_x_prevy + pix_prevx_prevy - (pix_x_y + pix_prevx_y)); } @@ -684,8 +688,10 @@ static bool EvaluateVerticalDiff(const l_uint32* data, int wpl, int diff_sign, if (y <= 0 || y >= height) return false; const l_uint32* line = data + y * wpl; - int pixel1 = GET_DATA_BYTE(const_cast (reinterpret_cast(line - wpl)), x); - int pixel2 = GET_DATA_BYTE(const_cast (reinterpret_cast(line)), x); + int pixel1 = GET_DATA_BYTE( + const_cast(reinterpret_cast(line - wpl)), x); + int pixel2 = + GET_DATA_BYTE(const_cast(reinterpret_cast(line)), x); int diff = (pixel2 - pixel1) * diff_sign; if (diff > *best_diff) { *best_diff = diff; @@ -705,8 +711,10 @@ static bool EvaluateHorizontalDiff(const l_uint32* line, int diff_sign, int* best_diff, int* best_sum, int* best_x) { if (x <= 0 || x >= width) return false; - int pixel1 = GET_DATA_BYTE(const_cast (reinterpret_cast(line)), x - 1); - int pixel2 = GET_DATA_BYTE(const_cast (reinterpret_cast(line)), x); + int pixel1 = GET_DATA_BYTE( + const_cast(reinterpret_cast(line)), x - 1); + int pixel2 = + GET_DATA_BYTE(const_cast(reinterpret_cast(line)), x); int diff = (pixel2 - pixel1) * diff_sign; if (diff > *best_diff) { *best_diff = diff; @@ -954,8 +962,7 @@ void C_OUTLINE::render_outline(int left, int top, Pix* pix) const { */ #ifndef GRAPHICS_DISABLED -void C_OUTLINE::plot(ScrollView* window, - ScrollView::Color colour) const { +void C_OUTLINE::plot(ScrollView* window, ScrollView::Color colour) const { inT16 stepindex; // index to cstep ICOORD pos; // current position DIR128 stepdir; // direction of step @@ -1016,7 +1023,6 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour, } #endif - /** * @name C_OUTLINE::operator= * @@ -1024,7 +1030,7 @@ void C_OUTLINE::plot_normed(const DENORM& denorm, ScrollView::Color colour, * @param source assign from this */ -C_OUTLINE & C_OUTLINE::operator= (const C_OUTLINE & source) { +C_OUTLINE& C_OUTLINE::operator=(const C_OUTLINE& source) { box = source.box; start = source.start; if (steps != NULL) diff --git a/ccstruct/matrix.h b/ccstruct/matrix.h index e13ef31899..56d261cad7 100644 --- a/ccstruct/matrix.h +++ b/ccstruct/matrix.h @@ -1,7 +1,9 @@ /* -*-C-*- ****************************************************************************** * - * File: matrix.h (Formerly matrix.h) + * File: matrix.h (Formerly matrix.h) + * Description: Generic 2-d array/matrix and banded triangular matrix class. + * Author: Ray Smith * Description: Ratings matrix code. (Used by associator) * Author: Mark Seaman, OCR Technology * Created: Wed May 16 13:22:06 1990 diff --git a/ccstruct/mod128.cpp b/ccstruct/mod128.cpp index ee4aa6c3c6..17776a2783 100644 --- a/ccstruct/mod128.cpp +++ b/ccstruct/mod128.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: mod128.c (Formerly dir128.c) * Description: Code to convert a DIR128 to an ICOORD. - * Author: Ray Smith - * Created: Tue Oct 22 11:56:09 BST 1991 + * Author: Ray Smith + * Created: Tue Oct 22 11:56:09 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccstruct/mod128.h b/ccstruct/mod128.h index 592264ba62..9b31d83a64 100644 --- a/ccstruct/mod128.h +++ b/ccstruct/mod128.h @@ -1,8 +1,8 @@ /********************************************************************** * File: mod128.h (Formerly dir128.h) * Description: Header for class which implements modulo arithmetic. - * Author: Ray Smith - * Created: Tue Mar 26 17:48:13 GMT 1991 + * Author: Ray Smith + * Created: Tue Mar 26 17:48:13 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccstruct/otsuthr.cpp b/ccstruct/otsuthr.cpp index 071b0d2345..ee6c739928 100644 --- a/ccstruct/otsuthr.cpp +++ b/ccstruct/otsuthr.cpp @@ -51,23 +51,16 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height, // only use opencl if compiled w/ OpenCL and selected device is opencl #ifdef USE_OPENCL - // all of channel 0 then all of channel 1... - int *histogramAllChannels = new int[kHistogramSize * num_channels]; + // all of channel 0 then all of channel 1... + int* histogramAllChannels = new int[kHistogramSize * num_channels]; - // Calculate Histogram on GPU - OpenclDevice od; - if (od.selectedDeviceIsOpenCL() && - (num_channels == 1 || num_channels == 4) && top == 0 && left == 0 ) { - od.HistogramRectOCL( - (const unsigned char*)pixGetData(src_pix), - num_channels, - pixGetWpl(src_pix) * 4, - left, - top, - width, - height, - kHistogramSize, - histogramAllChannels); + // Calculate Histogram on GPU + OpenclDevice od; + if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) && + top == 0 && left == 0) { + od.HistogramRectOCL((const unsigned char*)pixGetData(src_pix), num_channels, + pixGetWpl(src_pix) * 4, left, top, width, height, + kHistogramSize, histogramAllChannels); // Calculate Threshold from Histogram on cpu for (int ch = 0; ch < num_channels; ++ch) { @@ -143,7 +136,6 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height, delete[] histogramAllChannels; #endif // USE_OPENCL - if (!any_good_hivalue) { // Use the best of the ones that were not good enough. (*hi_values)[best_hi_index] = best_hi_value; diff --git a/ccstruct/pageres.h b/ccstruct/pageres.h index 7329bc89ea..fc84d4d0ea 100644 --- a/ccstruct/pageres.h +++ b/ccstruct/pageres.h @@ -1,7 +1,7 @@ /********************************************************************** * File: pageres.h (Formerly page_res.h) * Description: Results classes used by control.c - * Author: Phil Cheatle + * Author: Phil Cheatle * Created: Tue Sep 22 08:42:49 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. @@ -327,7 +327,7 @@ class WERD_RES : public ELIST_LINK { } // Deep copies everything except the ratings MATRIX. // To get that use deep_copy below. - WERD_RES(const WERD_RES &source) : ELIST_LINK(source) { + WERD_RES(const WERD_RES& source) : ELIST_LINK(source) { InitPointers(); *this = source; // see operator= } @@ -630,7 +630,7 @@ class WERD_RES : public ELIST_LINK { static WERD_RES* deep_copy(const WERD_RES* src) { WERD_RES* result = new WERD_RES(*src); // That didn't copy the ratings, but we want a copy if there is one to - // begin width. + // begin with. if (src->ratings != NULL) result->ratings = src->ratings->DeepCopy(); return result; diff --git a/ccstruct/pdblock.h b/ccstruct/pdblock.h index b64eff36d0..e9139f2ac5 100644 --- a/ccstruct/pdblock.h +++ b/ccstruct/pdblock.h @@ -29,90 +29,76 @@ struct Pix; CLISTIZEH (PDBLK) ///page block -class PDBLK -{ +class PDBLK { friend class BLOCK_RECT_IT; //< block iterator - public: - ///empty constructor - PDBLK() { - hand_poly = NULL; - index_ = 0; - } - ///simple constructor - PDBLK(inT16 xmin, //< bottom left - inT16 ymin, - inT16 xmax, //< top right - inT16 ymax); - - ///set vertex lists - ///@param left list of left vertices - ///@param right list of right vertices - void set_sides(ICOORDELT_LIST *left, - ICOORDELT_LIST *right); - - ///destructor - ~PDBLK () { - if (hand_poly) delete hand_poly; - } - - POLY_BLOCK *poly_block() const { - return hand_poly; - } - ///set the poly block - void set_poly_block(POLY_BLOCK *blk) { - hand_poly = blk; - } - ///get box - void bounding_box(ICOORD &bottom_left, //bottom left - ICOORD &top_right) const { //topright - bottom_left = box.botleft (); - top_right = box.topright (); - } - ///get real box - const TBOX &bounding_box() const { - return box; - } - - int index() const { - return index_; - } - void set_index(int value) { - index_ = value; - } - - ///is pt inside block - BOOL8 contains(ICOORD pt); - - /// reposition block - void move(const ICOORD vec); // by vector - - // Returns a binary Pix mask with a 1 pixel for every pixel within the - // block. Rotates the coordinate system by rerotation prior to rendering. - // If not NULL, mask_box is filled with the position box of the returned - // mask image. - Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box); - - #ifndef GRAPHICS_DISABLED - ///draw histogram - ///@param window window to draw in - ///@param serial serial number - ///@param colour colour to draw in - void plot(ScrollView* window, - inT32 serial, - ScrollView::Color colour); - #endif // GRAPHICS_DISABLED - - ///assignment - ///@param source from this - PDBLK & operator= (const PDBLK & source); - - protected: - POLY_BLOCK *hand_poly; //< weird as well - ICOORDELT_LIST leftside; //< left side vertices - ICOORDELT_LIST rightside; //< right side vertices - TBOX box; //< bounding box - int index_; //< Serial number of this block. + public: + /// empty constructor + PDBLK() { + hand_poly = NULL; + index_ = 0; + } + /// simple constructor + PDBLK(inT16 xmin, //< bottom left + inT16 ymin, + inT16 xmax, //< top right + inT16 ymax); + + /// set vertex lists + ///@param left list of left vertices + ///@param right list of right vertices + void set_sides(ICOORDELT_LIST *left, ICOORDELT_LIST *right); + + /// destructor + ~PDBLK() { + if (hand_poly) delete hand_poly; + } + + POLY_BLOCK *poly_block() const { return hand_poly; } + /// set the poly block + void set_poly_block(POLY_BLOCK *blk) { hand_poly = blk; } + /// get box + void bounding_box(ICOORD &bottom_left, // bottom left + ICOORD &top_right) const { // topright + bottom_left = box.botleft(); + top_right = box.topright(); + } + /// get real box + const TBOX &bounding_box() const { return box; } + + int index() const { return index_; } + void set_index(int value) { index_ = value; } + + /// is pt inside block + BOOL8 contains(ICOORD pt); + + /// reposition block + void move(const ICOORD vec); // by vector + + // Returns a binary Pix mask with a 1 pixel for every pixel within the + // block. Rotates the coordinate system by rerotation prior to rendering. + // If not NULL, mask_box is filled with the position box of the returned + // mask image. + Pix *render_mask(const FCOORD &rerotation, TBOX *mask_box); + +#ifndef GRAPHICS_DISABLED + /// draw histogram + ///@param window window to draw in + ///@param serial serial number + ///@param colour colour to draw in + void plot(ScrollView *window, inT32 serial, ScrollView::Color colour); +#endif // GRAPHICS_DISABLED + + /// assignment + ///@param source from this + PDBLK &operator=(const PDBLK &source); + + protected: + POLY_BLOCK *hand_poly; //< weird as well + ICOORDELT_LIST leftside; //< left side vertices + ICOORDELT_LIST rightside; //< right side vertices + TBOX box; //< bounding box + int index_; //< Serial number of this block. }; class DLLSYM BLOCK_RECT_IT //rectangle iterator diff --git a/ccstruct/polyaprx.cpp b/ccstruct/polyaprx.cpp index 81b8500a0e..7597349543 100644 --- a/ccstruct/polyaprx.cpp +++ b/ccstruct/polyaprx.cpp @@ -214,7 +214,7 @@ EDGEPT edgepts[] //output is array void fix2( //polygonal approx EDGEPT *start, /*loop to approimate */ int area) { - EDGEPT *edgept; /*current point */ + EDGEPT *edgept; /*current point */ EDGEPT *edgept1; EDGEPT *loopstart; /*modified start of loop */ EDGEPT *linestart; /*start of line segment */ diff --git a/ccstruct/polyblk.cpp b/ccstruct/polyblk.cpp index e0a455905f..b5ca2e1212 100644 --- a/ccstruct/polyblk.cpp +++ b/ccstruct/polyblk.cpp @@ -1,7 +1,7 @@ /********************************************************************** * File: polyblk.c (Formerly poly_block.c) * Description: Polygonal blocks - * Author: Sheelagh Lloyd? + * Author: Sheelagh Lloyd? * Created: * * (C) Copyright 1993, Hewlett-Packard Ltd. diff --git a/ccstruct/quspline.cpp b/ccstruct/quspline.cpp index f50cfe50ee..82107e1e25 100644 --- a/ccstruct/quspline.cpp +++ b/ccstruct/quspline.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: quspline.cpp (Formerly qspline.c) * Description: Code for the QSPLINE class. - * Author: Ray Smith - * Created: Tue Oct 08 17:16:12 BST 1991 + * Author: Ray Smith + * Created: Tue Oct 08 17:16:12 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccstruct/ratngs.h b/ccstruct/ratngs.h index 31b27cfb37..446dfc6c5d 100644 --- a/ccstruct/ratngs.h +++ b/ccstruct/ratngs.h @@ -288,7 +288,8 @@ class WERD_CHOICE : public ELIST_LINK { src_certainty, src_permuter); } WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset); - WERD_CHOICE(const WERD_CHOICE &word) : ELIST_LINK(word), unicharset_(word.unicharset_) { + WERD_CHOICE(const WERD_CHOICE &word) + : ELIST_LINK(word), unicharset_(word.unicharset_) { this->init(word.length()); this->operator=(word); } diff --git a/ccstruct/rect.cpp b/ccstruct/rect.cpp index 22417485db..4a9fe00b34 100644 --- a/ccstruct/rect.cpp +++ b/ccstruct/rect.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: rect.c (Formerly box.c) * Description: Bounding box class definition. - * Author: Phil Cheatle - * Created: Wed Oct 16 15:18:45 BST 1991 + * Author: Phil Cheatle + * Created: Wed Oct 16 15:18:45 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,10 +29,10 @@ * **********************************************************************/ -TBOX::TBOX( //constructor - const ICOORD pt1, //one corner - const ICOORD pt2 //the other corner - ) { +TBOX::TBOX( // constructor + const ICOORD pt1, // one corner + const ICOORD pt2 // the other corner + ) { if (pt1.x () <= pt2.x ()) { if (pt1.y () <= pt2.y ()) { bot_left = pt1; diff --git a/ccstruct/rect.h b/ccstruct/rect.h index d9b90642f4..f31247a1af 100644 --- a/ccstruct/rect.h +++ b/ccstruct/rect.h @@ -1,8 +1,8 @@ /********************************************************************** * File: rect.h (Formerly box.h) * Description: Bounding box class definition. - * Author: Phil Cheatle - * Created: Wed Oct 16 15:18:45 BST 1991 + * Author: Phil Cheatle + * Created: Wed Oct 16 15:18:45 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -307,9 +307,9 @@ class DLLSYM TBOX { // bounding box * **********************************************************************/ -inline TBOX::TBOX( // constructor - const FCOORD pt // floating centre - ) { +inline TBOX::TBOX( // constructor + const FCOORD pt // floating centre + ) { bot_left = ICOORD ((inT16) floor (pt.x ()), (inT16) floor (pt.y ())); top_right = ICOORD ((inT16) ceil (pt.x ()), (inT16) ceil (pt.y ())); } diff --git a/ccstruct/rejctmap.h b/ccstruct/rejctmap.h index d945dda1fa..009ba58a78 100644 --- a/ccstruct/rejctmap.h +++ b/ccstruct/rejctmap.h @@ -1,8 +1,8 @@ /********************************************************************** * File: rejctmap.h (Formerly rejmap.h) * Description: REJ and REJMAP class functions. - * Author: Phil Cheatle - * Created: Thu Jun 9 13:46:38 BST 1994 + * Author: Phil Cheatle + * Created: Thu Jun 9 13:46:38 BST 1994 * * (C) Copyright 1994, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -48,46 +48,45 @@ OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!! #include "bits16.h" #include "params.h" -enum REJ_FLAGS -{ +enum REJ_FLAGS { /* Reject modes which are NEVER overridden */ - R_TESS_FAILURE, // PERM Tess didn't classify - R_SMALL_XHT, // PERM Xht too small - R_EDGE_CHAR, // PERM Too close to edge of image - R_1IL_CONFLICT, // PERM 1Il confusion - R_POSTNN_1IL, // PERM 1Il unrejected by NN - R_REJ_CBLOB, // PERM Odd blob - R_MM_REJECT, // PERM Matrix match rejection (m's) - R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend + R_TESS_FAILURE, // PERM Tess didn't classify + R_SMALL_XHT, // PERM Xht too small + R_EDGE_CHAR, // PERM Too close to edge of image + R_1IL_CONFLICT, // PERM 1Il confusion + R_POSTNN_1IL, // PERM 1Il unrejected by NN + R_REJ_CBLOB, // PERM Odd blob + R_MM_REJECT, // PERM Matrix match rejection (m's) + R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend /* Initial reject modes (pre NN_ACCEPT) */ - R_POOR_MATCH, // TEMP Ray's original heuristic (Not used) - R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD - R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD - R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD + R_POOR_MATCH, // TEMP Ray's original heuristic (Not used) + R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD + R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD + R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */ - R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop - R_DUBIOUS, // TEMP Post NN dodgy chars - R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN - R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest - R_XHT_FIXUP, // TEMP Xht tests unsure + R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop + R_DUBIOUS, // TEMP Post NN dodgy chars + R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN + R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest + R_XHT_FIXUP, // TEMP Xht tests unsure /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */ - R_BAD_QUALITY, // TEMP Quality metrics bad for WERD + R_BAD_QUALITY, // TEMP Quality metrics bad for WERD /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/ - R_DOC_REJ, // TEMP Document rejection - R_BLOCK_REJ, // TEMP Block rejection - R_ROW_REJ, // TEMP Row rejection - R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space + R_DOC_REJ, // TEMP Document rejection + R_BLOCK_REJ, // TEMP Block rejection + R_ROW_REJ, // TEMP Row rejection + R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space /* Accept modes which occur between the above rejection groups */ - R_NN_ACCEPT, //NN acceptance - R_HYPHEN_ACCEPT, //Hyphen acceptance - R_MM_ACCEPT, //Matrix match acceptance - R_QUALITY_ACCEPT, //Accept word in good quality doc - R_MINIMAL_REJ_ACCEPT //Accept EVERYTHING except tess failures + R_NN_ACCEPT, // NN acceptance + R_HYPHEN_ACCEPT, // Hyphen acceptance + R_MM_ACCEPT, // Matrix match acceptance + R_QUALITY_ACCEPT, // Accept word in good quality doc + R_MINIMAL_REJ_ACCEPT // Accept EVERYTHING except tess failures }; /* REJECT MAP VALUES */ diff --git a/ccstruct/statistc.cpp b/ccstruct/statistc.cpp index 39d5edd180..8b1ba8c9a1 100644 --- a/ccstruct/statistc.cpp +++ b/ccstruct/statistc.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: statistc.c (Formerly stats.c) * Description: Simple statistical package for integer values. - * Author: Ray Smith - * Created: Mon Feb 04 16:56:05 GMT 1991 + * Author: Ray Smith + * Created: Mon Feb 04 16:56:05 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -215,7 +215,6 @@ inT32 STATS::min_bucket() const { // Find min return rangemin_ + min; } - /********************************************************************** * STATS::max_bucket * diff --git a/ccutil/clst.cpp b/ccutil/clst.cpp index fbbb561fad..52caadf38f 100644 --- a/ccutil/clst.cpp +++ b/ccutil/clst.cpp @@ -26,7 +26,7 @@ **********************************************************************/ /*********************************************************************** - * CLIST::internal_deep_clear + * CLIST::internal_deep_clear * * Used by the "deep_clear" member function of derived list * classes to destroy all the elements on the list. @@ -56,9 +56,8 @@ void (*zapper) (void *)) { //ptr to zapper functn } } - /*********************************************************************** - * CLIST::shallow_clear + * CLIST::shallow_clear * * Used by the destructor and the "shallow_clear" member function of derived * list classes to destroy the list. @@ -83,7 +82,7 @@ void CLIST::shallow_clear() { //destroy all links } /*********************************************************************** - * CLIST::assign_to_sublist + * CLIST::assign_to_sublist * * The list is set to a sublist of another list. "This" list must be empty * before this function is invoked. The two iterators passed must refer to @@ -107,9 +106,8 @@ void CLIST::assign_to_sublist( //to this list last = start_it->extract_sublist (end_it); } - /*********************************************************************** - * CLIST::length + * CLIST::length * * Return count of elements on list **********************************************************************/ @@ -123,9 +121,8 @@ inT32 CLIST::length() const { //count elements return count; } - /*********************************************************************** - * CLIST::sort + * CLIST::sort * * Sort elements on list **********************************************************************/ @@ -239,7 +236,7 @@ void CLIST::set_subtract(int comparator(const void*, const void*), **********************************************************************/ /*********************************************************************** - * CLIST_ITERATOR::forward + * CLIST_ITERATOR::forward * * Move the iterator to the next element of the list. * REMEMBER: ALL LISTS ARE CIRCULAR. @@ -276,9 +273,8 @@ void *CLIST_ITERATOR::forward() { return current->data; } - /*********************************************************************** - * CLIST_ITERATOR::data_relative + * CLIST_ITERATOR::data_relative * * Return the data pointer to the element "offset" elements from current. * "offset" must not be less than -1. @@ -312,9 +308,8 @@ void *CLIST_ITERATOR::data_relative( //get data + or - ... return ptr->data; } - /*********************************************************************** - * CLIST_ITERATOR::move_to_last() + * CLIST_ITERATOR::move_to_last() * * Move current so that it is set to the end of the list. * Return data just in case anyone wants it. @@ -336,9 +331,8 @@ void *CLIST_ITERATOR::move_to_last() { return current->data; } - /*********************************************************************** - * CLIST_ITERATOR::exchange() + * CLIST_ITERATOR::exchange() * * Given another iterator, whose current element is a different element on * the same list list OR an element of another list, exchange the two current @@ -434,9 +428,8 @@ void CLIST_ITERATOR::exchange( //positions of 2 link other_it->current = old_current; } - /*********************************************************************** - * CLIST_ITERATOR::extract_sublist() + * CLIST_ITERATOR::extract_sublist() * * This is a private member, used only by CLIST::assign_to_sublist. * Given another iterator for the same list, extract the links from THIS to @@ -478,7 +471,7 @@ CLIST_LINK *CLIST_ITERATOR::extract_sublist( //from temp_it.mark_cycle_pt (); do { //walk sublist - if (temp_it.cycled_list ()) //can't find end pt + if (temp_it.cycled_list()) // can't find end pt BAD_SUBLIST.error ("CLIST_ITERATOR.extract_sublist", ABORT, NULL); if (temp_it.at_last ()) { diff --git a/ccutil/clst.h b/ccutil/clst.h index a5a42a6e40..f93d75afcb 100644 --- a/ccutil/clst.h +++ b/ccutil/clst.h @@ -28,9 +28,9 @@ class CLIST_ITERATOR; /********************************************************************** - * CLASS - CLIST_LINK + * CLASS - CLIST_LINK * - * Generic link class for singly linked CONS cell lists + * Generic link class for singly linked CONS cell lists * * Note: No destructor - elements are assumed to be destroyed EITHER after * they have been extracted from a list OR by the CLIST destructor which @@ -50,13 +50,13 @@ class DLLSYM CLIST_LINK data = next = NULL; } - CLIST_LINK( //copy constructor - const CLIST_LINK &) { //don't copy link + CLIST_LINK( // copy constructor + const CLIST_LINK &) { // don't copy link data = next = NULL; } - void operator= ( //don't copy links - const CLIST_LINK &) { + void operator=( // don't copy links + const CLIST_LINK &) { data = next = NULL; } }; @@ -89,8 +89,8 @@ class DLLSYM CLIST void internal_deep_clear ( //destroy all links void (*zapper) (void *)); //ptr to zapper functn - void shallow_clear(); //clear list but don't - //delete data elements + void shallow_clear(); // clear list but don't + // delete data elements bool empty() const { //is list empty? return !last; @@ -136,9 +136,10 @@ class DLLSYM CLIST }; /*********************************************************************** - * CLASS - CLIST_ITERATOR + * CLASS - CLIST_ITERATOR * - * Generic iterator class for singly linked lists with embedded links + * Generic iterator class for singly linked lists with embedded + *links **********************************************************************/ class DLLSYM CLIST_ITERATOR @@ -231,8 +232,8 @@ class DLLSYM CLIST_ITERATOR BOOL8 cycled_list(); //Completed a cycle? - void add_to_end( //add at end & - void *new_data); //don't move + void add_to_end( // add at end & + void *new_data); // don't move void exchange( //positions of 2 links CLIST_ITERATOR *other_it); //other iterator @@ -246,7 +247,7 @@ class DLLSYM CLIST_ITERATOR }; /*********************************************************************** - * CLIST_ITERATOR::set_to_list + * CLIST_ITERATOR::set_to_list * * (Re-)initialise the iterator to point to the start of the list_to_iterate * over. @@ -270,9 +271,8 @@ inline void CLIST_ITERATOR::set_to_list( //change list ex_current_was_cycle_pt = FALSE; } - /*********************************************************************** - * CLIST_ITERATOR::CLIST_ITERATOR + * CLIST_ITERATOR::CLIST_ITERATOR * * CONSTRUCTOR - set iterator to specified list; **********************************************************************/ @@ -281,9 +281,8 @@ inline CLIST_ITERATOR::CLIST_ITERATOR(CLIST *list_to_iterate) { set_to_list(list_to_iterate); } - /*********************************************************************** - * CLIST_ITERATOR::add_after_then_move + * CLIST_ITERATOR::add_after_then_move * * Add a new element to the list after the current element and move the * iterator to the new element. @@ -329,9 +328,8 @@ inline void CLIST_ITERATOR::add_after_then_move( // element to add current = new_element; } - /*********************************************************************** - * CLIST_ITERATOR::add_after_stay_put + * CLIST_ITERATOR::add_after_stay_put * * Add a new element to the list after the current element but do not move * the iterator to the new element. @@ -380,9 +378,8 @@ inline void CLIST_ITERATOR::add_after_stay_put( // element to add } } - /*********************************************************************** - * CLIST_ITERATOR::add_before_then_move + * CLIST_ITERATOR::add_before_then_move * * Add a new element to the list before the current element and move the * iterator to the new element. @@ -425,9 +422,8 @@ inline void CLIST_ITERATOR::add_before_then_move( // element to add current = new_element; } - /*********************************************************************** - * CLIST_ITERATOR::add_before_stay_put + * CLIST_ITERATOR::add_before_stay_put * * Add a new element to the list before the current element but don't move the * iterator to the new element. @@ -471,11 +467,11 @@ inline void CLIST_ITERATOR::add_before_stay_put( // element to add } } - /*********************************************************************** - * CLIST_ITERATOR::add_list_after + * CLIST_ITERATOR::add_list_after * - * Insert another list to this list after the current element but don't move the + * Insert another list to this list after the current element but don't move + *the * iterator. **********************************************************************/ @@ -518,9 +514,8 @@ inline void CLIST_ITERATOR::add_list_after(CLIST *list_to_add) { } } - /*********************************************************************** - * CLIST_ITERATOR::add_list_before + * CLIST_ITERATOR::add_list_before * * Insert another list to this list before the current element. Move the * iterator to the start of the inserted elements @@ -563,9 +558,8 @@ inline void CLIST_ITERATOR::add_list_before(CLIST *list_to_add) { } } - /*********************************************************************** - * CLIST_ITERATOR::extract + * CLIST_ITERATOR::extract * * Do extraction by removing current from the list, deleting the cons cell * and returning the data to the caller, but NOT updating the iterator. (So @@ -606,9 +600,8 @@ inline void *CLIST_ITERATOR::extract() { return extracted_data; } - /*********************************************************************** - * CLIST_ITERATOR::move_to_first() + * CLIST_ITERATOR::move_to_first() * * Move current so that it is set to the start of the list. * Return data just in case anyone wants it. @@ -626,9 +619,8 @@ inline void *CLIST_ITERATOR::move_to_first() { return current != NULL ? current->data : NULL; } - /*********************************************************************** - * CLIST_ITERATOR::mark_cycle_pt() + * CLIST_ITERATOR::mark_cycle_pt() * * Remember the current location so that we can tell whether we've returned * to this point later. @@ -651,9 +643,8 @@ inline void CLIST_ITERATOR::mark_cycle_pt() { started_cycling = FALSE; } - /*********************************************************************** - * CLIST_ITERATOR::at_first() + * CLIST_ITERATOR::at_first() * * Are we at the start of the list? * @@ -671,9 +662,8 @@ inline BOOL8 CLIST_ITERATOR::at_first() { !ex_current_was_last)); //first and last } - /*********************************************************************** - * CLIST_ITERATOR::at_last() + * CLIST_ITERATOR::at_last() * * Are we at the end of the list? * @@ -691,9 +681,8 @@ inline BOOL8 CLIST_ITERATOR::at_last() { ex_current_was_last)); //first and last } - /*********************************************************************** - * CLIST_ITERATOR::cycled_list() + * CLIST_ITERATOR::cycled_list() * * Have we returned to the cycle_pt since it was set? * @@ -709,9 +698,8 @@ inline BOOL8 CLIST_ITERATOR::cycled_list() { } - /*********************************************************************** - * CLIST_ITERATOR::length() + * CLIST_ITERATOR::length() * * Return the length of the list * @@ -726,9 +714,8 @@ inline inT32 CLIST_ITERATOR::length() { return list->length (); } - /*********************************************************************** - * CLIST_ITERATOR::sort() + * CLIST_ITERATOR::sort() * * Sort the elements of the list, then reposition at the start. * @@ -747,9 +734,8 @@ const void *, const void *)) { move_to_first(); } - /*********************************************************************** - * CLIST_ITERATOR::add_to_end + * CLIST_ITERATOR::add_to_end * * Add a new element to the end of the list without moving the iterator. * This is provided because a single linked list cannot move to the last as @@ -811,7 +797,7 @@ The macro generates: - An element deletion function: CLASSNAME##_c1_zapper - An element copier function: CLASSNAME##_c1_copier - - A CLIST subclass: CLASSNAME##_CLIST + - A CLIST subclass: CLASSNAME##_CLIST - A CLIST_ITERATOR subclass: CLASSNAME##_C_IT @@ -830,114 +816,116 @@ CLISTIZEH is a concatenation of 3 fragments CLISTIZEH_A, CLISTIZEH_B and CLISTIZEH_C. ***********************************************************************/ -#define CLISTIZEH_A( CLASSNAME ) \ - \ -extern DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ -void* link); /*link to delete*/ \ - \ -extern DLLSYM void* CLASSNAME##_c1_copier( /*deep copy a link*/ \ -void* old_element); /*source link */ - -#define CLISTIZEH_B( CLASSNAME ) \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_CLIST \ -* \ -* List class for class CLASSNAME \ -* \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_CLIST : public CLIST \ -{ \ -public: \ - CLASSNAME##_CLIST():CLIST() {} \ - /* constructor */ \ - \ - CLASSNAME##_CLIST( /* don't construct */ \ - const CLASSNAME##_CLIST&) /*by initial assign*/ \ - { DONT_CONSTRUCT_LIST_BY_COPY.error( QUOTE_IT( CLASSNAME##_CLIST ), \ - ABORT, NULL ); } \ - \ -void deep_clear() /* delete elements */ \ - { CLIST::internal_deep_clear( &CLASSNAME##_c1_zapper ); } \ - \ -void operator=( /* prevent assign */ \ - const CLASSNAME##_CLIST&) \ - { DONT_ASSIGN_LISTS.error( QUOTE_IT( CLASSNAME##_CLIST ), \ - ABORT, NULL ); } - -#define CLISTIZEH_C( CLASSNAME ) \ - \ -}; \ - \ - \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_C_IT \ -* \ -* Iterator class for class CLASSNAME##_CLIST \ -* \ -* Note: We don't need to coerce pointers to member functions input \ -* parameters as these are automatically converted to the type of the base \ -* type. ("A ptr to a class may be converted to a pointer to a public base \ -* class of that class") \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_C_IT : public CLIST_ITERATOR \ -{ \ -public: \ - CLASSNAME##_C_IT():CLIST_ITERATOR(){} \ - \ - CLASSNAME##_C_IT( \ - CLASSNAME##_CLIST* list):CLIST_ITERATOR(list){} \ - \ - CLASSNAME* data() \ - { return (CLASSNAME*) CLIST_ITERATOR::data(); } \ - \ - CLASSNAME* data_relative( \ - inT8 offset) \ - { return (CLASSNAME*) CLIST_ITERATOR::data_relative( offset ); } \ - \ - CLASSNAME* forward() \ - { return (CLASSNAME*) CLIST_ITERATOR::forward(); } \ - \ - CLASSNAME* extract() \ - { return (CLASSNAME*) CLIST_ITERATOR::extract(); } \ - \ - CLASSNAME* move_to_first() \ - { return (CLASSNAME*) CLIST_ITERATOR::move_to_first(); } \ - \ - CLASSNAME* move_to_last() \ - { return (CLASSNAME*) CLIST_ITERATOR::move_to_last(); } \ -}; +#define CLISTIZEH_A(CLASSNAME) \ + \ + extern DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ + void *link); /*link to delete*/ \ + \ + extern DLLSYM void \ + *CLASSNAME##_c1_copier( /*deep copy a link*/ \ + void *old_element); /*source link */ + +#define CLISTIZEH_B(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASS - \ + *CLASSNAME##_CLIST \ + * \ + * List class for class \ + *CLASSNAME \ + * \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_CLIST : public CLIST { \ + public: \ + CLASSNAME##_CLIST() : CLIST() {} \ + /* constructor */ \ + \ + CLASSNAME##_CLIST( /* don't construct */ \ + const CLASSNAME##_CLIST &) /*by initial assign*/ \ + { \ + DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_CLIST), ABORT, \ + NULL); \ + } \ + \ + void deep_clear() /* delete elements */ \ + { \ + CLIST::internal_deep_clear(&CLASSNAME##_c1_zapper); \ + } \ + \ + void operator=(/* prevent assign */ \ + const CLASSNAME##_CLIST &) { \ + DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_CLIST), ABORT, NULL); \ + } -#define CLISTIZEH( CLASSNAME ) \ - \ -CLISTIZEH_A( CLASSNAME ) \ - \ -CLISTIZEH_B( CLASSNAME ) \ - \ -CLISTIZEH_C( CLASSNAME ) +#define CLISTIZEH_C(CLASSNAME) \ + } \ + ; \ + \ + /*********************************************************************** \ + * CLASS - CLASSNAME##_C_IT \ + * \ + * Iterator class for class CLASSNAME##_CLIST \ + * \ + * Note: We don't need to coerce pointers to member functions input \ + * parameters as these are automatically converted to the type of the base \ + * type. ("A ptr to a class may be converted to a pointer to a public base \ + * class of that class") \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_C_IT : public CLIST_ITERATOR { \ + public: \ + CLASSNAME##_C_IT() : CLIST_ITERATOR() {} \ + \ + CLASSNAME##_C_IT(CLASSNAME##_CLIST *list) : CLIST_ITERATOR(list) {} \ + \ + CLASSNAME *data() { return (CLASSNAME *)CLIST_ITERATOR::data(); } \ + \ + CLASSNAME *data_relative(inT8 offset) { \ + return (CLASSNAME *)CLIST_ITERATOR::data_relative(offset); \ + } \ + \ + CLASSNAME *forward() { return (CLASSNAME *)CLIST_ITERATOR::forward(); } \ + \ + CLASSNAME *extract() { return (CLASSNAME *)CLIST_ITERATOR::extract(); } \ + \ + CLASSNAME *move_to_first() { \ + return (CLASSNAME *)CLIST_ITERATOR::move_to_first(); \ + } \ + \ + CLASSNAME *move_to_last() { \ + return (CLASSNAME *)CLIST_ITERATOR::move_to_last(); \ + } \ + }; + +#define CLISTIZEH(CLASSNAME) \ + \ + CLISTIZEH_A(CLASSNAME) \ + \ + CLISTIZEH_B(CLASSNAME) \ + \ + CLISTIZEH_C(CLASSNAME) /*********************************************************************** CLISTIZE( CLASSNAME ) MACRO ***********************************************************************/ -#define CLISTIZE( CLASSNAME ) \ - \ -/*********************************************************************** \ -* CLASSNAME##_c1_zapper \ -* \ -* A function which can delete a CLASSNAME element. This is passed to the \ -* generic deep_clear list member function so that when a list is cleared the \ -* elements on the list are properly destroyed from the base class, even \ -* though we don't use a virtual destructor function. \ -**********************************************************************/ \ - \ -DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ -void* link) /*link to delete*/ \ -{ \ -delete (CLASSNAME *) link; \ -} \ +#define CLISTIZE(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASSNAME##_c1_zapper \ + * \ + * A function which can delete a CLASSNAME element. This is passed to the \ + * generic deep_clear list member function so that when a list is cleared \ + *the \ + * elements on the list are properly destroyed from the base class, even \ + * though we don't use a virtual destructor function. \ + **********************************************************************/ \ + \ + DLLSYM void CLASSNAME##_c1_zapper( /*delete a link*/ \ + void *link) /*link to delete*/ \ + { \ + delete (CLASSNAME *)link; \ + } #endif diff --git a/ccutil/elst.cpp b/ccutil/elst.cpp index 8ad999b5ba..2d2c9ad65d 100644 --- a/ccutil/elst.cpp +++ b/ccutil/elst.cpp @@ -26,7 +26,7 @@ **********************************************************************/ /*********************************************************************** - * ELIST::internal_clear + * ELIST::internal_clear * * Used by the destructor and the "clear" member function of derived list * classes to destroy all the elements on the list. @@ -57,7 +57,7 @@ void (*zapper) (ELIST_LINK *)) { } /*********************************************************************** - * ELIST::assign_to_sublist + * ELIST::assign_to_sublist * * The list is set to a sublist of another list. "This" list must be empty * before this function is invoked. The two iterators passed must refer to @@ -81,9 +81,8 @@ void ELIST::assign_to_sublist( //to this list last = start_it->extract_sublist (end_it); } - /*********************************************************************** - * ELIST::length + * ELIST::length * * Return count of elements on list **********************************************************************/ @@ -97,9 +96,8 @@ inT32 ELIST::length() const { // count elements return count; } - /*********************************************************************** - * ELIST::sort + * ELIST::sort * * Sort elements on list * NB If you don't like the const declarations in the comparator, coerce yours: @@ -187,7 +185,7 @@ ELIST_LINK *ELIST::add_sorted_and_find( **********************************************************************/ /*********************************************************************** - * ELIST_ITERATOR::forward + * ELIST_ITERATOR::forward * * Move the iterator to the next element of the list. * REMEMBER: ALL LISTS ARE CIRCULAR. @@ -224,9 +222,8 @@ ELIST_LINK *ELIST_ITERATOR::forward() { return current; } - /*********************************************************************** - * ELIST_ITERATOR::data_relative + * ELIST_ITERATOR::data_relative * * Return the data pointer to the element "offset" elements from current. * "offset" must not be less than -1. @@ -260,9 +257,8 @@ ELIST_LINK *ELIST_ITERATOR::data_relative( //get data + or - ... return ptr; } - /*********************************************************************** - * ELIST_ITERATOR::move_to_last() + * ELIST_ITERATOR::move_to_last() * * Move current so that it is set to the end of the list. * Return data just in case anyone wants it. @@ -281,9 +277,8 @@ ELIST_LINK *ELIST_ITERATOR::move_to_last() { return current; } - /*********************************************************************** - * ELIST_ITERATOR::exchange() + * ELIST_ITERATOR::exchange() * * Given another iterator, whose current element is a different element on * the same list list OR an element of another list, exchange the two current @@ -379,9 +374,8 @@ void ELIST_ITERATOR::exchange( //positions of 2 link other_it->current = old_current; } - /*********************************************************************** - * ELIST_ITERATOR::extract_sublist() + * ELIST_ITERATOR::extract_sublist() * * This is a private member, used only by ELIST::assign_to_sublist. * Given another iterator for the same list, extract the links from THIS to @@ -425,7 +419,7 @@ ELIST_LINK *ELIST_ITERATOR::extract_sublist( //from temp_it.mark_cycle_pt (); do { //walk sublist - if (temp_it.cycled_list ()) //can't find end pt + if (temp_it.cycled_list()) // can't find end pt BAD_SUBLIST.error ("ELIST_ITERATOR.extract_sublist", ABORT, NULL); if (temp_it.at_last ()) { diff --git a/ccutil/elst.h b/ccutil/elst.h index e239577419..d53a7c34f0 100644 --- a/ccutil/elst.h +++ b/ccutil/elst.h @@ -98,8 +98,8 @@ class DLLSYM ELIST_LINK next = NULL; } - void operator= ( //don't copy links - const ELIST_LINK &) { + void operator=( // don't copy links + const ELIST_LINK &) { next = NULL; } }; @@ -273,8 +273,8 @@ class DLLSYM ELIST_ITERATOR bool cycled_list(); //Completed a cycle? - void add_to_end( //add at end & - ELIST_LINK *new_link); //don't move + void add_to_end( // add at end & + ELIST_LINK *new_link); // don't move void exchange( //positions of 2 links ELIST_ITERATOR *other_it); //other iterator @@ -458,7 +458,6 @@ inline void ELIST_ITERATOR::add_before_then_move( // element to add current = new_element; } - /*********************************************************************** * ELIST_ITERATOR::add_before_stay_put * @@ -501,11 +500,11 @@ inline void ELIST_ITERATOR::add_before_stay_put( // element to add } } - /*********************************************************************** * ELIST_ITERATOR::add_list_after * - * Insert another list to this list after the current element but don't move the + * Insert another list to this list after the current element but don't move + *the * iterator. **********************************************************************/ @@ -959,30 +958,29 @@ ELISTIZEH_C( CLASSNAME ) ELISTIZE( CLASSNAME ) MACRO ***********************************************************************/ -#define ELISTIZE(CLASSNAME) \ - \ -/*********************************************************************** \ -* CLASSNAME##_zapper \ -* \ -* A function which can delete a CLASSNAME element. This is passed to the \ -* generic clear list member function so that when a list is cleared the \ -* elements on the list are properly destroyed from the base class, even \ -* though we don't use a virtual destructor function. \ -**********************************************************************/ \ - \ -DLLSYM void CLASSNAME##_zapper(ELIST_LINK* link) { \ - delete reinterpret_cast(link); \ -} \ - \ -/* Become a deep copy of src_list*/ \ -void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST* src_list, \ - CLASSNAME* (*copier)(const CLASSNAME*)) { \ - \ - CLASSNAME##_IT from_it(const_cast(src_list)); \ - CLASSNAME##_IT to_it(this); \ - \ - for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ - to_it.add_after_then_move((*copier)(from_it.data())); \ -} +#define ELISTIZE(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASSNAME##_zapper \ + * \ + * A function which can delete a CLASSNAME element. This is passed to the \ + * generic clear list member function so that when a list is cleared the \ + * elements on the list are properly destroyed from the base class, even \ + * though we don't use a virtual destructor function. \ + **********************************************************************/ \ + \ + DLLSYM void CLASSNAME##_zapper(ELIST_LINK *link) { \ + delete reinterpret_cast(link); \ + } \ + \ + /* Become a deep copy of src_list*/ \ + void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST *src_list, \ + CLASSNAME *(*copier)(const CLASSNAME *)) { \ + CLASSNAME##_IT from_it(const_cast(src_list)); \ + CLASSNAME##_IT to_it(this); \ + \ + for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ + to_it.add_after_then_move((*copier)(from_it.data())); \ + } #endif diff --git a/ccutil/elst2.cpp b/ccutil/elst2.cpp index 30cedec17b..0d4960ed49 100644 --- a/ccutil/elst2.cpp +++ b/ccutil/elst2.cpp @@ -27,7 +27,7 @@ **********************************************************************/ /*********************************************************************** - * ELIST2::internal_clear + * ELIST2::internal_clear * * Used by the destructor and the "clear" member function of derived list * classes to destroy all the elements on the list. @@ -58,7 +58,7 @@ void (*zapper) (ELIST2_LINK *)) { } /*********************************************************************** - * ELIST2::assign_to_sublist + * ELIST2::assign_to_sublist * * The list is set to a sublist of another list. "This" list must be empty * before this function is invoked. The two iterators passed must refer to @@ -82,9 +82,8 @@ void ELIST2::assign_to_sublist( //to this list last = start_it->extract_sublist (end_it); } - /*********************************************************************** - * ELIST2::length + * ELIST2::length * * Return count of elements on list **********************************************************************/ @@ -98,9 +97,8 @@ inT32 ELIST2::length() const { // count elements return count; } - /*********************************************************************** - * ELIST2::sort + * ELIST2::sort * * Sort elements on list * NB If you don't like the const declarations in the comparator, coerce yours: @@ -180,7 +178,7 @@ void ELIST2::add_sorted(int comparator(const void*, const void*), **********************************************************************/ /*********************************************************************** - * ELIST2_ITERATOR::forward + * ELIST2_ITERATOR::forward * * Move the iterator to the next element of the list. * REMEMBER: ALL LISTS ARE CIRCULAR. @@ -218,9 +216,8 @@ ELIST2_LINK *ELIST2_ITERATOR::forward() { return current; } - /*********************************************************************** - * ELIST2_ITERATOR::backward + * ELIST2_ITERATOR::backward * * Move the iterator to the previous element of the list. * REMEMBER: ALL LISTS ARE CIRCULAR. @@ -257,9 +254,8 @@ ELIST2_LINK *ELIST2_ITERATOR::backward() { return current; } - /*********************************************************************** - * ELIST2_ITERATOR::data_relative + * ELIST2_ITERATOR::data_relative * * Return the data pointer to the element "offset" elements from current. * (This function can't be INLINEd because it contains a loop) @@ -289,9 +285,8 @@ ELIST2_LINK *ELIST2_ITERATOR::data_relative( //get data + or - .. return ptr; } - /*********************************************************************** - * ELIST2_ITERATOR::exchange() + * ELIST2_ITERATOR::exchange() * * Given another iterator, whose current element is a different element on * the same list list OR an element of another list, exchange the two current @@ -399,9 +394,8 @@ void ELIST2_ITERATOR::exchange( //positions of 2 li other_it->current = old_current; } - /*********************************************************************** - * ELIST2_ITERATOR::extract_sublist() + * ELIST2_ITERATOR::extract_sublist() * * This is a private member, used only by ELIST2::assign_to_sublist. * Given another iterator for the same list, extract the links from THIS to @@ -445,7 +439,7 @@ ELIST2_LINK *ELIST2_ITERATOR::extract_sublist( //fr temp_it.mark_cycle_pt (); do { //walk sublist - if (temp_it.cycled_list ()) //can't find end pt + if (temp_it.cycled_list()) // can't find end pt BAD_SUBLIST.error ("ELIST2_ITERATOR.extract_sublist", ABORT, NULL); if (temp_it.at_last ()) { diff --git a/ccutil/elst2.h b/ccutil/elst2.h index 364abd86bc..bf078fbd56 100644 --- a/ccutil/elst2.h +++ b/ccutil/elst2.h @@ -46,9 +46,9 @@ i) The duplication in source does not affect the run time code size - the **********************************************************************/ /********************************************************************** - * CLASS - ELIST2_LINK + * CLASS - ELIST2_LINK * - * Generic link class for doubly linked lists with embedded links + * Generic link class for doubly linked lists with embedded links * * Note: No destructor - elements are assumed to be destroyed EITHER after * they have been extracted from a list OR by the ELIST2 destructor which @@ -68,13 +68,13 @@ class DLLSYM ELIST2_LINK prev = next = NULL; } - ELIST2_LINK( //copy constructor - const ELIST2_LINK &) { //don't copy link + ELIST2_LINK( // copy constructor + const ELIST2_LINK &) { // don't copy link prev = next = NULL; } - void operator= ( //don't copy links - const ELIST2_LINK &) { + void operator=( // don't copy links + const ELIST2_LINK &) { prev = next = NULL; } }; @@ -142,9 +142,10 @@ class DLLSYM ELIST2 }; /*********************************************************************** - * CLASS - ELIST2_ITERATOR + * CLASS - ELIST2_ITERATOR * - * Generic iterator class for doubly linked lists with embedded links + * Generic iterator class for doubly linked lists with embedded + *links **********************************************************************/ class DLLSYM ELIST2_ITERATOR @@ -240,8 +241,8 @@ class DLLSYM ELIST2_ITERATOR BOOL8 cycled_list(); //Completed a cycle? - void add_to_end( //add at end & - ELIST2_LINK *new_link); //don't move + void add_to_end( // add at end & + ELIST2_LINK *new_link); // don't move void exchange( //positions of 2 links ELIST2_ITERATOR *other_it); //other iterator @@ -255,7 +256,7 @@ class DLLSYM ELIST2_ITERATOR }; /*********************************************************************** - * ELIST2_ITERATOR::set_to_list + * ELIST2_ITERATOR::set_to_list * * (Re-)initialise the iterator to point to the start of the list_to_iterate * over. @@ -279,9 +280,8 @@ inline void ELIST2_ITERATOR::set_to_list( //change list ex_current_was_cycle_pt = FALSE; } - /*********************************************************************** - * ELIST2_ITERATOR::ELIST2_ITERATOR + * ELIST2_ITERATOR::ELIST2_ITERATOR * * CONSTRUCTOR - set iterator to specified list; **********************************************************************/ @@ -290,9 +290,8 @@ inline ELIST2_ITERATOR::ELIST2_ITERATOR(ELIST2 *list_to_iterate) { set_to_list(list_to_iterate); } - /*********************************************************************** - * ELIST2_ITERATOR::add_after_then_move + * ELIST2_ITERATOR::add_after_then_move * * Add a new element to the list after the current element and move the * iterator to the new element. @@ -339,9 +338,8 @@ inline void ELIST2_ITERATOR::add_after_then_move( // element to add current = new_element; } - /*********************************************************************** - * ELIST2_ITERATOR::add_after_stay_put + * ELIST2_ITERATOR::add_after_stay_put * * Add a new element to the list after the current element but do not move * the iterator to the new element. @@ -391,9 +389,8 @@ inline void ELIST2_ITERATOR::add_after_stay_put( // element to add } } - /*********************************************************************** - * ELIST2_ITERATOR::add_before_then_move + * ELIST2_ITERATOR::add_before_then_move * * Add a new element to the list before the current element and move the * iterator to the new element. @@ -438,9 +435,8 @@ inline void ELIST2_ITERATOR::add_before_then_move( // element to add current = new_element; } - /*********************************************************************** - * ELIST2_ITERATOR::add_before_stay_put + * ELIST2_ITERATOR::add_before_stay_put * * Add a new element to the list before the current element but don't move the * iterator to the new element. @@ -486,11 +482,11 @@ inline void ELIST2_ITERATOR::add_before_stay_put( // element to add } } - /*********************************************************************** - * ELIST2_ITERATOR::add_list_after + * ELIST2_ITERATOR::add_list_after * - * Insert another list to this list after the current element but don't move the + * Insert another list to this list after the current element but don't move + *the * iterator. **********************************************************************/ @@ -537,9 +533,8 @@ inline void ELIST2_ITERATOR::add_list_after(ELIST2 *list_to_add) { } } - /*********************************************************************** - * ELIST2_ITERATOR::add_list_before + * ELIST2_ITERATOR::add_list_before * * Insert another list to this list before the current element. Move the * iterator to the start of the inserted elements @@ -586,9 +581,8 @@ inline void ELIST2_ITERATOR::add_list_before(ELIST2 *list_to_add) { } } - /*********************************************************************** - * ELIST2_ITERATOR::extract + * ELIST2_ITERATOR::extract * * Do extraction by removing current from the list, returning it to the * caller, but NOT updating the iterator. (So that any calling loop can do @@ -631,9 +625,8 @@ inline ELIST2_LINK *ELIST2_ITERATOR::extract() { return extracted_link; } - /*********************************************************************** - * ELIST2_ITERATOR::move_to_first() + * ELIST2_ITERATOR::move_to_first() * * Move current so that it is set to the start of the list. * Return data just in case anyone wants it. @@ -651,9 +644,8 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_first() { return current; } - /*********************************************************************** - * ELIST2_ITERATOR::move_to_last() + * ELIST2_ITERATOR::move_to_last() * * Move current so that it is set to the end of the list. * Return data just in case anyone wants it. @@ -671,9 +663,8 @@ inline ELIST2_LINK *ELIST2_ITERATOR::move_to_last() { return current; } - /*********************************************************************** - * ELIST2_ITERATOR::mark_cycle_pt() + * ELIST2_ITERATOR::mark_cycle_pt() * * Remember the current location so that we can tell whether we've returned * to this point later. @@ -696,9 +687,8 @@ inline void ELIST2_ITERATOR::mark_cycle_pt() { started_cycling = FALSE; } - /*********************************************************************** - * ELIST2_ITERATOR::at_first() + * ELIST2_ITERATOR::at_first() * * Are we at the start of the list? * @@ -716,9 +706,8 @@ inline BOOL8 ELIST2_ITERATOR::at_first() { !ex_current_was_last)); //first and last } - /*********************************************************************** - * ELIST2_ITERATOR::at_last() + * ELIST2_ITERATOR::at_last() * * Are we at the end of the list? * @@ -736,9 +725,8 @@ inline BOOL8 ELIST2_ITERATOR::at_last() { ex_current_was_last)); //first and last } - /*********************************************************************** - * ELIST2_ITERATOR::cycled_list() + * ELIST2_ITERATOR::cycled_list() * * Have we returned to the cycle_pt since it was set? * @@ -754,9 +742,8 @@ inline BOOL8 ELIST2_ITERATOR::cycled_list() { } - /*********************************************************************** - * ELIST2_ITERATOR::length() + * ELIST2_ITERATOR::length() * * Return the length of the list * @@ -771,9 +758,8 @@ inline inT32 ELIST2_ITERATOR::length() { return list->length (); } - /*********************************************************************** - * ELIST2_ITERATOR::sort() + * ELIST2_ITERATOR::sort() * * Sort the elements of the list, then reposition at the start. * @@ -792,9 +778,8 @@ const void *, const void *)) { move_to_first(); } - /*********************************************************************** - * ELIST2_ITERATOR::add_to_end + * ELIST2_ITERATOR::add_to_end * * Add a new element to the end of the list without moving the iterator. * This is provided because a single linked list cannot move to the last as @@ -854,7 +839,7 @@ will NOT work correctly for classes derived from this. The macro generates: - An element deletion function: CLASSNAME##_zapper - - An E_LIST2 subclass: CLASSNAME##_LIST + - An E_LIST2 subclass: CLASSNAME##_LIST - An E_LIST2_ITERATOR subclass: CLASSNAME##_IT @@ -873,132 +858,132 @@ ELIST2IZEH is a concatenation of 3 fragments ELIST2IZEH_A, ELIST2IZEH_B and ELIST2IZEH_C. ***********************************************************************/ -#define ELIST2IZEH_A( CLASSNAME ) \ - \ -extern DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ -ELIST2_LINK* link); /*link to delete*/ - -#define ELIST2IZEH_B( CLASSNAME ) \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_LIST \ -* \ -* List class for class CLASSNAME \ -* \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_LIST : public ELIST2 \ -{ \ -public: \ - CLASSNAME##_LIST():ELIST2() {} \ - /* constructor */ \ - \ - CLASSNAME##_LIST( /* don't construct */ \ - const CLASSNAME##_LIST&) /*by initial assign*/\ - { DONT_CONSTRUCT_LIST_BY_COPY.error( QUOTE_IT( CLASSNAME##_LIST ), \ - ABORT, NULL ); } \ - \ -void clear() /* delete elements */\ - { ELIST2::internal_clear( &CLASSNAME##_zapper ); } \ - \ - ~CLASSNAME##_LIST() /* destructor */ \ - { clear(); } \ -\ -/* Become a deep copy of src_list*/ \ -void deep_copy(const CLASSNAME##_LIST* src_list, \ - CLASSNAME* (*copier)(const CLASSNAME*)); \ -\ -void operator=( /* prevent assign */ \ - const CLASSNAME##_LIST&) \ - { DONT_ASSIGN_LISTS.error( QUOTE_IT( CLASSNAME##_LIST ), \ - ABORT, NULL ); } - -#define ELIST2IZEH_C( CLASSNAME ) \ -}; \ - \ - \ - \ -/*********************************************************************** \ -* CLASS - CLASSNAME##_IT \ -* \ -* Iterator class for class CLASSNAME##_LIST \ -* \ -* Note: We don't need to coerce pointers to member functions input \ -* parameters as these are automatically converted to the type of the base \ -* type. ("A ptr to a class may be converted to a pointer to a public base \ -* class of that class") \ -**********************************************************************/ \ - \ -class DLLSYM CLASSNAME##_IT : public ELIST2_ITERATOR \ -{ \ -public: \ - CLASSNAME##_IT():ELIST2_ITERATOR(){} \ - \ - CLASSNAME##_IT( \ -CLASSNAME##_LIST* list):ELIST2_ITERATOR(list){} \ - \ - CLASSNAME* data() \ - { return (CLASSNAME*) ELIST2_ITERATOR::data(); } \ - \ - CLASSNAME* data_relative( \ - inT8 offset) \ - { return (CLASSNAME*) ELIST2_ITERATOR::data_relative( offset ); } \ - \ - CLASSNAME* forward() \ - { return (CLASSNAME*) ELIST2_ITERATOR::forward(); } \ - \ - CLASSNAME* backward() \ - { return (CLASSNAME*) ELIST2_ITERATOR::backward(); } \ - \ - CLASSNAME* extract() \ - { return (CLASSNAME*) ELIST2_ITERATOR::extract(); } \ - \ - CLASSNAME* move_to_first() \ - { return (CLASSNAME*) ELIST2_ITERATOR::move_to_first(); } \ - \ - CLASSNAME* move_to_last() \ - { return (CLASSNAME*) ELIST2_ITERATOR::move_to_last(); } \ -}; - -#define ELIST2IZEH( CLASSNAME ) \ - \ -ELIST2IZEH_A( CLASSNAME ) \ - \ -ELIST2IZEH_B( CLASSNAME ) \ - \ -ELIST2IZEH_C( CLASSNAME ) +#define ELIST2IZEH_A(CLASSNAME) \ + \ + extern DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ + ELIST2_LINK *link); /*link to delete*/ + +#define ELIST2IZEH_B(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASS - \ + *CLASSNAME##_LIST \ + * \ + * List class for class \ + *CLASSNAME \ + * \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_LIST : public ELIST2 { \ + public: \ + CLASSNAME##_LIST() : ELIST2() {} \ + /* constructor */ \ + \ + CLASSNAME##_LIST( /* don't construct */ \ + const CLASSNAME##_LIST &) /*by initial assign*/ \ + { \ + DONT_CONSTRUCT_LIST_BY_COPY.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, \ + NULL); \ + } \ + \ + void clear() /* delete elements */ \ + { \ + ELIST2::internal_clear(&CLASSNAME##_zapper); \ + } \ + \ + ~CLASSNAME##_LIST() /* destructor */ \ + { \ + clear(); \ + } \ + \ + /* Become a deep copy of src_list*/ \ + void deep_copy(const CLASSNAME##_LIST *src_list, \ + CLASSNAME *(*copier)(const CLASSNAME *)); \ + \ + void operator=(/* prevent assign */ \ + const CLASSNAME##_LIST &) { \ + DONT_ASSIGN_LISTS.error(QUOTE_IT(CLASSNAME##_LIST), ABORT, NULL); \ + } +#define ELIST2IZEH_C(CLASSNAME) \ + } \ + ; \ + \ + /*********************************************************************** \ + * CLASS - CLASSNAME##_IT \ + * \ + * Iterator class for class CLASSNAME##_LIST \ + * \ + * Note: We don't need to coerce pointers to member functions input \ + * parameters as these are automatically converted to the type of the base \ + * type. ("A ptr to a class may be converted to a pointer to a public base \ + * class of that class") \ + **********************************************************************/ \ + \ + class DLLSYM CLASSNAME##_IT : public ELIST2_ITERATOR { \ + public: \ + CLASSNAME##_IT() : ELIST2_ITERATOR() {} \ + \ + CLASSNAME##_IT(CLASSNAME##_LIST *list) : ELIST2_ITERATOR(list) {} \ + \ + CLASSNAME *data() { return (CLASSNAME *)ELIST2_ITERATOR::data(); } \ + \ + CLASSNAME *data_relative(inT8 offset) { \ + return (CLASSNAME *)ELIST2_ITERATOR::data_relative(offset); \ + } \ + \ + CLASSNAME *forward() { return (CLASSNAME *)ELIST2_ITERATOR::forward(); } \ + \ + CLASSNAME *backward() { return (CLASSNAME *)ELIST2_ITERATOR::backward(); } \ + \ + CLASSNAME *extract() { return (CLASSNAME *)ELIST2_ITERATOR::extract(); } \ + \ + CLASSNAME *move_to_first() { \ + return (CLASSNAME *)ELIST2_ITERATOR::move_to_first(); \ + } \ + \ + CLASSNAME *move_to_last() { \ + return (CLASSNAME *)ELIST2_ITERATOR::move_to_last(); \ + } \ + }; + +#define ELIST2IZEH(CLASSNAME) \ + \ + ELIST2IZEH_A(CLASSNAME) \ + \ + ELIST2IZEH_B(CLASSNAME) \ + \ + ELIST2IZEH_C(CLASSNAME) /*********************************************************************** ELIST2IZE( CLASSNAME ) MACRO ***********************************************************************/ -#define ELIST2IZE( CLASSNAME ) \ - \ -/*********************************************************************** \ -* CLASSNAME##_zapper \ -* \ -* A function which can delete a CLASSNAME element. This is passed to the \ -* generic clear list member function so that when a list is cleared the \ -* elements on the list are properly destroyed from the base class, even \ -* though we don't use a virtual destructor function. \ -**********************************************************************/ \ - \ -DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ -ELIST2_LINK* link) /*link to delete*/ \ -{ \ -delete (CLASSNAME *) link; \ -} \ -\ -/* Become a deep copy of src_list*/ \ -void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST* src_list, \ - CLASSNAME* (*copier)(const CLASSNAME*)) { \ -\ - CLASSNAME##_IT from_it(const_cast(src_list)); \ - CLASSNAME##_IT to_it(this); \ -\ - for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ - to_it.add_after_then_move((*copier)(from_it.data())); \ -} +#define ELIST2IZE(CLASSNAME) \ + \ + /*********************************************************************** \ + * CLASSNAME##_zapper \ + * \ + * A function which can delete a CLASSNAME element. This is passed to the \ + * generic clear list member function so that when a list is cleared the \ + * elements on the list are properly destroyed from the base class, even \ + * though we don't use a virtual destructor function. \ + **********************************************************************/ \ + \ + DLLSYM void CLASSNAME##_zapper( /*delete a link*/ \ + ELIST2_LINK *link) /*link to delete*/ \ + { \ + delete (CLASSNAME *)link; \ + } \ + \ + /* Become a deep copy of src_list*/ \ + void CLASSNAME##_LIST::deep_copy(const CLASSNAME##_LIST *src_list, \ + CLASSNAME *(*copier)(const CLASSNAME *)) { \ + CLASSNAME##_IT from_it(const_cast(src_list)); \ + CLASSNAME##_IT to_it(this); \ + \ + for (from_it.mark_cycle_pt(); !from_it.cycled_list(); from_it.forward()) \ + to_it.add_after_then_move((*copier)(from_it.data())); \ + } #endif diff --git a/ccutil/errcode.h b/ccutil/errcode.h index d690240036..2f31a7b9ae 100644 --- a/ccutil/errcode.h +++ b/ccutil/errcode.h @@ -87,11 +87,10 @@ const ERRCODE ASSERT_FAILED = "Assert failed"; __FILE__, __LINE__); \ } -#define ASSERT_HOST_MSG(x, ...) if (!(x)) \ - { \ - tprintf(__VA_ARGS__); \ - ASSERT_FAILED.error(#x, ABORT, "in file %s, line %d", \ - __FILE__, __LINE__); \ +#define ASSERT_HOST_MSG(x, ...) \ + if (!(x)) { \ + tprintf(__VA_ARGS__); \ + ASSERT_FAILED.error(#x, ABORT, "in file %s, line %d", __FILE__, __LINE__); \ } void signal_exit(int signal_code); diff --git a/ccutil/lsterr.h b/ccutil/lsterr.h index 42ed07e326..82497ae80d 100644 --- a/ccutil/lsterr.h +++ b/ccutil/lsterr.h @@ -38,6 +38,6 @@ const ERRCODE NULL_PREV = "Previous element on the list is NULL"; const ERRCODE EMPTY_LIST = "List is empty"; const ERRCODE BAD_PARAMETER = "List parameter error"; const ERRCODE STILL_LINKED = -"Attempting to add an element with non NULL links, to a list"; + "Attempting to add an element with non NULL links, to a list"; #endif #endif diff --git a/ccutil/mainblk.cpp b/ccutil/mainblk.cpp index aa73c55002..36becce9ec 100644 --- a/ccutil/mainblk.cpp +++ b/ccutil/mainblk.cpp @@ -74,7 +74,7 @@ void CCUtil::main_setup(const char *argv0, const char *basename) { #endif /* _WIN32 */ #if defined(TESSDATA_PREFIX) } else { - /* Use tessdata prefix which was compiled in. */ +/* Use tessdata prefix which was compiled in. */ #define _STR(a) #a #define _XSTR(a) _STR(a) datadir = _XSTR(TESSDATA_PREFIX); diff --git a/ccutil/ocrclass.h b/ccutil/ocrclass.h index 3175a6d29a..f352956fa0 100644 --- a/ccutil/ocrclass.h +++ b/ccutil/ocrclass.h @@ -1,7 +1,7 @@ /********************************************************************** * File: ocrclass.h * Description: Class definitions and constants for the OCR API. - * Author: Hewlett-Packard Co + * Author: Hewlett-Packard Co * * (C) Copyright 1996, Hewlett-Packard Co. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -110,28 +110,35 @@ typedef struct { /*single character */ * user words found. If it returns true then operation is cancelled. **********************************************************************/ typedef bool (*CANCEL_FUNC)(void* cancel_this, int words); -typedef bool (*PROGRESS_FUNC)(int progress, - int left, int right, int top, int bottom); +typedef bool (*PROGRESS_FUNC)(int progress, int left, int right, int top, + int bottom); class ETEXT_DESC { // output header public: - inT16 count; /// chars in this buffer(0) - inT16 progress; /// percent complete increasing (0-100) + inT16 count; /// chars in this buffer(0) + inT16 progress; /// percent complete increasing (0-100) /** Progress monitor covers word recognition and it does not cover layout * analysis. * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */ - inT8 more_to_come; /// true if not last - volatile inT8 ocr_alive; /// ocr sets to 1, HP 0 - inT8 err_code; /// for errcode use - CANCEL_FUNC cancel; /// returns true to cancel - PROGRESS_FUNC progress_callback; /// called whenever progress increases - void* cancel_this; /// this or other data for cancel - struct timeval end_time; /** time to stop. expected to be set only by call - * to set_deadline_msecs() */ - EANYCODE_CHAR text[1]; /// character data + inT8 more_to_come; /// true if not last + volatile inT8 ocr_alive; /// ocr sets to 1, HP 0 + inT8 err_code; /// for errcode use + CANCEL_FUNC cancel; /// returns true to cancel + PROGRESS_FUNC progress_callback; /// called whenever progress increases + void* cancel_this; /// this or other data for cancel + struct timeval end_time; /// Time to stop. Expected to be set only + /// by call to set_deadline_msecs(). + EANYCODE_CHAR text[1]; /// character data - ETEXT_DESC() : count(0), progress(0), more_to_come(0), ocr_alive(0), - err_code(0), cancel(NULL), cancel_this(NULL) { + ETEXT_DESC() + : count(0), + progress(0), + more_to_come(0), + ocr_alive(0), + err_code(0), + cancel(NULL), + progress_callback(NULL), + cancel_this(NULL) { end_time.tv_sec = 0; end_time.tv_usec = 0; } diff --git a/ccutil/params.cpp b/ccutil/params.cpp index 9b604fbef4..c8dd3514e5 100644 --- a/ccutil/params.cpp +++ b/ccutil/params.cpp @@ -31,8 +31,7 @@ #define EQUAL '=' tesseract::ParamsVectors *GlobalParams() { - static tesseract::ParamsVectors global_params = - tesseract::ParamsVectors(); + static tesseract::ParamsVectors global_params = tesseract::ParamsVectors(); return &global_params; } diff --git a/ccutil/strngs.cpp b/ccutil/strngs.cpp index ff3bbac287..0760852e90 100644 --- a/ccutil/strngs.cpp +++ b/ccutil/strngs.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: strngs.c (Formerly strings.c) * Description: STRING class functions. - * Author: Ray Smith - * Created: Fri Feb 15 09:13:30 GMT 1991 + * Author: Ray Smith + * Created: Fri Feb 15 09:13:30 GMT 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/ccutil/tessdatamanager.h b/ccutil/tessdatamanager.h index fd2685a1d8..e583b70049 100644 --- a/ccutil/tessdatamanager.h +++ b/ccutil/tessdatamanager.h @@ -76,24 +76,24 @@ enum TessdataType { * kTessdataFileSuffixes[i] indicates the file suffix for * tessdata of type i (from TessdataType enum). */ -static const char * const kTessdataFileSuffixes[] = { - kLangConfigFileSuffix, // 0 - kUnicharsetFileSuffix, // 1 - kAmbigsFileSuffix, // 2 - kBuiltInTemplatesFileSuffix, // 3 - kBuiltInCutoffsFileSuffix, // 4 - kNormProtoFileSuffix, // 5 - kPuncDawgFileSuffix, // 6 - kSystemDawgFileSuffix, // 7 - kNumberDawgFileSuffix, // 8 - kFreqDawgFileSuffix, // 9 - kFixedLengthDawgsFileSuffix, // 10 // deprecated - kCubeUnicharsetFileSuffix, // 11 - kCubeSystemDawgFileSuffix, // 12 - kShapeTableFileSuffix, // 13 - kBigramDawgFileSuffix, // 14 - kUnambigDawgFileSuffix, // 15 - kParamsModelFileSuffix, // 16 +static const char *const kTessdataFileSuffixes[] = { + kLangConfigFileSuffix, // 0 + kUnicharsetFileSuffix, // 1 + kAmbigsFileSuffix, // 2 + kBuiltInTemplatesFileSuffix, // 3 + kBuiltInCutoffsFileSuffix, // 4 + kNormProtoFileSuffix, // 5 + kPuncDawgFileSuffix, // 6 + kSystemDawgFileSuffix, // 7 + kNumberDawgFileSuffix, // 8 + kFreqDawgFileSuffix, // 9 + kFixedLengthDawgsFileSuffix, // 10 // deprecated + kCubeUnicharsetFileSuffix, // 11 + kCubeSystemDawgFileSuffix, // 12 + kShapeTableFileSuffix, // 13 + kBigramDawgFileSuffix, // 14 + kUnambigDawgFileSuffix, // 15 + kParamsModelFileSuffix, // 16 }; /** @@ -101,23 +101,23 @@ static const char * const kTessdataFileSuffixes[] = { * of type i (from TessdataType enum) is text, and is binary otherwise. */ static const bool kTessdataFileIsText[] = { - true, // 0 - true, // 1 - true, // 2 - false, // 3 - true, // 4 - true, // 5 - false, // 6 - false, // 7 - false, // 8 - false, // 9 - false, // 10 // deprecated - true, // 11 - false, // 12 - false, // 13 - false, // 14 - false, // 15 - true, // 16 + true, // 0 + true, // 1 + true, // 2 + false, // 3 + true, // 4 + true, // 5 + false, // 6 + false, // 7 + false, // 8 + false, // 9 + false, // 10 // deprecated + true, // 11 + false, // 12 + false, // 13 + false, // 14 + false, // 15 + true, // 16 }; /** diff --git a/ccutil/unicharset.h b/ccutil/unicharset.h index 684655affb..023e84d5b6 100644 --- a/ccutil/unicharset.h +++ b/ccutil/unicharset.h @@ -181,8 +181,7 @@ class UNICHARSET { // Return the UNICHAR_ID of a given unichar representation within the // UNICHARSET. Only the first length characters from unichar_repr are used. - UNICHAR_ID unichar_to_id(const char* const unichar_repr, - int length) const; + UNICHAR_ID unichar_to_id(const char* const unichar_repr, int length) const; // Return the minimum number of bytes that matches a legal UNICHAR_ID, // while leaving the rest of the string encodable. Returns 0 if the diff --git a/classify/classify.cpp b/classify/classify.cpp index 436efd1f2d..7c11c51f6e 100644 --- a/classify/classify.cpp +++ b/classify/classify.cpp @@ -151,8 +151,8 @@ Classify::Classify() INT_MEMBER(classify_integer_matcher_multiplier, 10, "Integer Matcher Multiplier 0-255: ", this->params()), EnableLearning(true), - INT_MEMBER(il1_adaption_test, 0, "Don't adapt to i/I at beginning of word", - this->params()), + INT_MEMBER(il1_adaption_test, 0, + "Don't adapt to i/I at beginning of word", this->params()), BOOL_MEMBER(classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9].", this->params()), double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size", diff --git a/classify/cluster.cpp b/classify/cluster.cpp index b723bfa82e..1f82349552 100644 --- a/classify/cluster.cpp +++ b/classify/cluster.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: cluster.c - ** Purpose: Routines for clustering points in N-D space - ** Author: Dan Johnson - ** History: 5/29/89, DSJ, Created. + ** Filename: cluster.c + ** Purpose: Routines for clustering points in N-D space + ** Author: Dan Johnson + ** History: 5/29/89, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -390,11 +390,11 @@ double InvertMatrix(const float* input, int size, float* inv); * This routine creates a new clusterer data structure, * initializes it, and returns a pointer to it. * - * @param SampleSize number of dimensions in feature space - * @param ParamDesc description of each dimension - * @return pointer to the new clusterer data structure - * @note Exceptions: None - * @note History: 5/29/89, DSJ, Created. + * @param SampleSize number of dimensions in feature space + * @param ParamDesc description of each dimension + * @return pointer to the new clusterer data structure + * @note Exceptions: None + * @note History: 5/29/89, DSJ, Created. */ CLUSTERER * MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) { @@ -437,7 +437,6 @@ MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) { return Clusterer; } // MakeClusterer - /** * This routine creates a new sample data structure to hold * the specified feature. This sample is added to the clusterer @@ -445,14 +444,14 @@ MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]) { * clustered later), and a pointer to the sample is returned to * the caller. * - * @param Clusterer clusterer data structure to add sample to - * @param Feature feature to be added to clusterer - * @param CharID unique ident. of char that sample came from + * @param Clusterer clusterer data structure to add sample to + * @param Feature feature to be added to clusterer + * @param CharID unique ident. of char that sample came from * - * @return Pointer to the new sample data structure - * @note Exceptions: ALREADYCLUSTERED MakeSample can't be called after + * @return Pointer to the new sample data structure + * @note Exceptions: ALREADYCLUSTERED MakeSample can't be called after * ClusterSamples has been called - * @note History: 5/29/89, DSJ, Created. + * @note History: 5/29/89, DSJ, Created. */ SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, inT32 CharID) { @@ -490,7 +489,6 @@ SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, return (Sample); } // MakeSample - /** * This routine first checks to see if the samples in this * clusterer have already been clustered before; if so, it does @@ -505,12 +503,12 @@ SAMPLE* MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, * list of prototypes that best represent the samples given * the constraints specified in Config. * - * @param Clusterer data struct containing samples to be clustered - * @param Config parameters which control clustering process + * @param Clusterer data struct containing samples to be clustered + * @param Config parameters which control clustering process * * @return Pointer to a list of prototypes - * @note Exceptions: None - * @note History: 5/29/89, DSJ, Created. + * @note Exceptions: None + * @note History: 5/29/89, DSJ, Created. */ LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { //only create cluster tree if samples have never been clustered before @@ -523,10 +521,16 @@ LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { //compute prototypes starting at the root node in the tree ComputePrototypes(Clusterer, Config); - return (Clusterer->ProtoList); + // We don't need the cluster pointers in the protos any more, so null them + // out, which makes it safe to delete the clusterer. + LIST proto_list = Clusterer->ProtoList; + iterate(proto_list) { + PROTOTYPE *proto = reinterpret_cast(first_node(proto_list)); + proto->Cluster = NULL; + } + return Clusterer->ProtoList; } // ClusterSamples - /** * This routine frees all of the memory allocated to the * specified data structure. It will not, however, free @@ -535,10 +539,10 @@ LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { * to NULL to indicate that the cluster data structures no * longer exist. Any sample lists that have been obtained * via calls to GetSamples are no longer valid. - * @param Clusterer pointer to data structure to be freed + * @param Clusterer pointer to data structure to be freed * @return None - * @note Exceptions: None - * @note History: 6/6/89, DSJ, Created. + * @note Exceptions: None + * @note History: 6/6/89, DSJ, Created. */ void FreeClusterer(CLUSTERER *Clusterer) { if (Clusterer != NULL) { @@ -558,21 +562,19 @@ void FreeClusterer(CLUSTERER *Clusterer) { } } // FreeClusterer - /** * This routine frees all of the memory allocated to the * specified list of prototypes. The clusters which are * pointed to by the prototypes are not freed. - * @param ProtoList pointer to list of prototypes to be freed + * @param ProtoList pointer to list of prototypes to be freed * @return None - * @note Exceptions: None - * @note History: 6/6/89, DSJ, Created. + * @note Exceptions: None + * @note History: 6/6/89, DSJ, Created. */ void FreeProtoList(LIST *ProtoList) { destroy_nodes(*ProtoList, FreePrototype); } // FreeProtoList - /** * This routine deallocates the memory consumed by the specified * prototype and modifies the corresponding cluster so that it @@ -606,7 +608,6 @@ void FreePrototype(void *arg) { //PROTOTYPE *Prototype) memfree(Prototype); } // FreePrototype - /** * This routine is used to find all of the samples which * belong to a cluster. It starts by removing the top @@ -617,10 +618,10 @@ void FreePrototype(void *arg) { //PROTOTYPE *Prototype) * If all samples have been found, NULL is returned. * InitSampleSearch() must be called * before NextSample() to initialize the search. - * @param SearchState ptr to list containing clusters to be searched - * @return Pointer to the next leaf cluster (sample) or NULL. - * @note Exceptions: None - * @note History: 6/16/89, DSJ, Created. + * @param SearchState ptr to list containing clusters to be searched + * @return Pointer to the next leaf cluster (sample) or NULL. + * @note Exceptions: None + * @note History: 6/16/89, DSJ, Created. */ CLUSTER *NextSample(LIST *SearchState) { CLUSTER *Cluster; @@ -637,29 +638,27 @@ CLUSTER *NextSample(LIST *SearchState) { } } // NextSample - /** * This routine returns the mean of the specified * prototype in the indicated dimension. - * @param Proto prototype to return mean of - * @param Dimension dimension whose mean is to be returned - * @return Mean of Prototype in Dimension + * @param Proto prototype to return mean of + * @param Dimension dimension whose mean is to be returned + * @return Mean of Prototype in Dimension * @note Exceptions: none - * @note History: 7/6/89, DSJ, Created. + * @note History: 7/6/89, DSJ, Created. */ FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension) { return (Proto->Mean[Dimension]); } // Mean - /** * This routine returns the standard deviation of the * prototype in the indicated dimension. - * @param Proto prototype to return standard deviation of - * @param Dimension dimension whose stddev is to be returned - * @return Standard deviation of Prototype in Dimension + * @param Proto prototype to return standard deviation of + * @param Dimension dimension whose stddev is to be returned + * @return Standard deviation of Prototype in Dimension * @note Exceptions: none - * @note History: 7/6/89, DSJ, Created. + * @note History: 7/6/89, DSJ, Created. */ FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension) { switch (Proto->Style) { @@ -697,10 +696,10 @@ FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension) { * tree are the individual samples themselves; they have no * sub-clusters. The root node of the tree conceptually contains * all of the samples. - * @param Clusterer data structure holdings samples to be clustered - * @return None (the Clusterer data structure is changed) - * @note Exceptions: None - * @note History: 5/29/89, DSJ, Created. + * @param Clusterer data structure holdings samples to be clustered + * @return None (the Clusterer data structure is changed) + * @note Exceptions: None + * @note History: 5/29/89, DSJ, Created. */ void CreateClusterTree(CLUSTERER *Clusterer) { ClusteringContext context; @@ -760,7 +759,6 @@ void CreateClusterTree(CLUSTERER *Clusterer) { memfree(context.candidates); } // CreateClusterTree - /** * This routine is designed to be used in concert with the * KDWalk routine. It will create a potential cluster for @@ -786,7 +784,6 @@ void MakePotentialClusters(ClusteringContext *context, } } // MakePotentialClusters - /** * This routine searches the specified kd-tree for the nearest * neighbor of the specified cluster. It actually uses the @@ -795,12 +792,12 @@ void MakePotentialClusters(ClusteringContext *context, * neighbor is returned, if it can be found, otherwise NULL is * returned. The distance between the 2 nodes is placed * in the specified variable. - * @param Tree kd-tree to search in for nearest neighbor - * @param Cluster cluster whose nearest neighbor is to be found - * @param Distance ptr to variable to report distance found - * @return Pointer to the nearest neighbor of Cluster, or NULL + * @param Tree kd-tree to search in for nearest neighbor + * @param Cluster cluster whose nearest neighbor is to be found + * @param Distance ptr to variable to report distance found + * @return Pointer to the nearest neighbor of Cluster, or NULL * @note Exceptions: none - * @note History: 5/29/89, DSJ, Created. + * @note History: 5/29/89, DSJ, Created. * 7/13/89, DSJ, Removed visibility of kd-tree node data struct */ CLUSTER * @@ -830,17 +827,16 @@ FindNearestNeighbor(KDTREE * Tree, CLUSTER * Cluster, FLOAT32 * Distance) return BestNeighbor; } // FindNearestNeighbor - /** * This routine creates a new permanent cluster from the * clusters specified in TempCluster. The 2 clusters in * TempCluster are marked as "clustered" and deleted from * the kd-tree. The new cluster is then added to the kd-tree. - * @param Clusterer current clustering environment - * @param TempCluster potential cluster to make permanent + * @param Clusterer current clustering environment + * @param TempCluster potential cluster to make permanent * @return Pointer to the new permanent cluster - * @note Exceptions: none - * @note History: 5/29/89, DSJ, Created. + * @note Exceptions: none + * @note History: 5/29/89, DSJ, Created. * 7/13/89, DSJ, Removed visibility of kd-tree node data struct */ CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) { @@ -872,21 +868,20 @@ CLUSTER *MakeNewCluster(CLUSTERER *Clusterer, TEMPCLUSTER *TempCluster) { return Cluster; } // MakeNewCluster - /** * This routine merges two clusters into one larger cluster. * To do this it computes the number of samples in the new * cluster and the mean of the new cluster. The ParamDesc * information is used to ensure that circular dimensions * are handled correctly. - * @param N # of dimensions (size of arrays) - * @param ParamDesc array of dimension descriptions - * @param n1, n2 number of samples in each old cluster - * @param m array to hold mean of new cluster - * @param m1, m2 arrays containing means of old clusters - * @return The number of samples in the new cluster. - * @note Exceptions: None - * @note History: 5/31/89, DSJ, Created. + * @param N # of dimensions (size of arrays) + * @param ParamDesc array of dimension descriptions + * @param n1, n2 number of samples in each old cluster + * @param m array to hold mean of new cluster + * @param m1, m2 arrays containing means of old clusters + * @return The number of samples in the new cluster. + * @note Exceptions: None + * @note History: 5/31/89, DSJ, Created. */ inT32 MergeClusters(inT16 N, PARAM_DESC ParamDesc[], @@ -921,17 +916,16 @@ inT32 MergeClusters(inT16 N, return n; } // MergeClusters - /** * This routine decides which clusters in the cluster tree * should be represented by prototypes, forms a list of these * prototypes, and places the list in the Clusterer data * structure. - * @param Clusterer data structure holding cluster tree - * @param Config parameters used to control prototype generation - * @return None - * @note Exceptions: None - * @note History: 5/30/89, DSJ, Created. + * @param Clusterer data structure holding cluster tree + * @param Config parameters used to control prototype generation + * @return None + * @note Exceptions: None + * @note History: 5/30/89, DSJ, Created. */ void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { LIST ClusterStack = NIL_LIST; @@ -961,8 +955,7 @@ void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { } } // ComputePrototypes - -/** +/** * This routine attempts to create a prototype from the * specified cluster that conforms to the distribution * specified in Config. If there are too few samples in the @@ -972,12 +965,12 @@ void ComputePrototypes(CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { * is generated and NULL is returned. If a prototype can be * found that matches the desired distribution then a pointer * to it is returned, otherwise NULL is returned. - * @param Clusterer data structure holding cluster tree - * @param Config parameters used to control prototype generation - * @param Cluster cluster to be made into a prototype - * @return Pointer to new prototype or NULL - * @note Exceptions: None - * @note History: 6/19/89, DSJ, Created. + * @param Clusterer data structure holding cluster tree + * @param Config parameters used to control prototype generation + * @param Cluster cluster to be made into a prototype + * @return Pointer to new prototype or NULL + * @note Exceptions: None + * @note History: 6/19/89, DSJ, Created. */ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, CLUSTERCONFIG *Config, @@ -1050,7 +1043,6 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, return Proto; } // MakePrototype - /** * This routine checks for clusters which are degenerate and * therefore cannot be analyzed in a statistically valid way. @@ -1063,14 +1055,14 @@ PROTOTYPE *MakePrototype(CLUSTERER *Clusterer, * * If the cluster is not degenerate, NULL is returned. * - * @param N number of dimensions - * @param Cluster cluster being analyzed - * @param Statistics statistical info about cluster - * @param Style type of prototype to be generated - * @param MinSamples minimum number of samples in a cluster - * @return Pointer to degenerate prototype or NULL. - * @note Exceptions: None - * @note History: 6/20/89, DSJ, Created. + * @param N number of dimensions + * @param Cluster cluster being analyzed + * @param Statistics statistical info about cluster + * @param Style type of prototype to be generated + * @param MinSamples minimum number of samples in a cluster + * @return Pointer to degenerate prototype or NULL. + * @note Exceptions: None + * @note History: 6/20/89, DSJ, Created. * 7/12/89, DSJ, Changed name and added check for 0 stddev. * 8/8/89, DSJ, Removed check for 0 stddev (handled elsewhere). */ @@ -1110,10 +1102,10 @@ PROTOTYPE *MakeDegenerateProto( //this was MinSample * be split. If not, then a new prototype is formed and * returned to the caller. If there is, then NULL is returned * to the caller. - * @param Clusterer data struct containing samples being clustered + * @param Clusterer data struct containing samples being clustered * @param Config provides the magic number of samples that make a good cluster - * @param Cluster cluster to be made into an elliptical prototype - * @param Statistics statistical info about cluster + * @param Cluster cluster to be made into an elliptical prototype + * @param Statistics statistical info about cluster * @return Pointer to new elliptical prototype or NULL. */ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, @@ -1215,13 +1207,13 @@ PROTOTYPE *TestEllipticalProto(CLUSTERER *Clusterer, * be approximated by a spherical normal distribution. If it * can be, then a new prototype is formed and returned to the * caller. If it can't be, then NULL is returned to the caller. - * @param Clusterer data struct containing samples being clustered - * @param Cluster cluster to be made into a spherical prototype - * @param Statistics statistical info about cluster - * @param Buckets histogram struct used to analyze distribution - * @return Pointer to new spherical prototype or NULL. - * @note Exceptions: None - * @note History: 6/1/89, DSJ, Created. + * @param Clusterer data struct containing samples being clustered + * @param Cluster cluster to be made into a spherical prototype + * @param Statistics statistical info about cluster + * @param Buckets histogram struct used to analyze distribution + * @return Pointer to new spherical prototype or NULL. + * @note Exceptions: None + * @note History: 6/1/89, DSJ, Created. */ PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, @@ -1247,19 +1239,18 @@ PROTOTYPE *MakeSphericalProto(CLUSTERER *Clusterer, return (Proto); } // MakeSphericalProto - /** * This routine tests the specified cluster to see if it can * be approximated by an elliptical normal distribution. If it * can be, then a new prototype is formed and returned to the * caller. If it can't be, then NULL is returned to the caller. - * @param Clusterer data struct containing samples being clustered - * @param Cluster cluster to be made into an elliptical prototype - * @param Statistics statistical info about cluster - * @param Buckets histogram struct used to analyze distribution - * @return Pointer to new elliptical prototype or NULL. - * @note Exceptions: None - * @note History: 6/12/89, DSJ, Created. + * @param Clusterer data struct containing samples being clustered + * @param Cluster cluster to be made into an elliptical prototype + * @param Statistics statistical info about cluster + * @param Buckets histogram struct used to analyze distribution + * @return Pointer to new elliptical prototype or NULL. + * @note Exceptions: None + * @note History: 6/12/89, DSJ, Created. */ PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, CLUSTER *Cluster, @@ -1286,7 +1277,6 @@ PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, return (Proto); } // MakeEllipticalProto - /** * This routine tests each dimension of the specified cluster to * see what distribution would best approximate that dimension. @@ -1295,14 +1285,14 @@ PROTOTYPE *MakeEllipticalProto(CLUSTERER *Clusterer, * be represented by one of these distributions, * then a new prototype is formed and returned to the * caller. If it can't be, then NULL is returned to the caller. - * @param Clusterer data struct containing samples being clustered - * @param Cluster cluster to be made into a prototype - * @param Statistics statistical info about cluster - * @param NormalBuckets histogram struct used to analyze distribution - * @param Confidence confidence level for alternate distributions - * @return Pointer to new mixed prototype or NULL. - * @note Exceptions: None - * @note History: 6/12/89, DSJ, Created. + * @param Clusterer data struct containing samples being clustered + * @param Cluster cluster to be made into a prototype + * @param Statistics statistical info about cluster + * @param NormalBuckets histogram struct used to analyze distribution + * @param Confidence confidence level for alternate distributions + * @return Pointer to new mixed prototype or NULL. + * @note Exceptions: None + * @note History: 6/12/89, DSJ, Created. */ PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, CLUSTER *Cluster, @@ -1355,16 +1345,15 @@ PROTOTYPE *MakeMixedProto(CLUSTERER *Clusterer, return (Proto); } // MakeMixedProto - /** * This routine alters the ith dimension of the specified * mixed prototype to be D_random. - * @param i index of dimension to be changed - * @param Proto prototype whose dimension is to be altered - * @param ParamDesc description of specified dimension - * @return None - * @note Exceptions: None - * @note History: 6/20/89, DSJ, Created. + * @param i index of dimension to be changed + * @param Proto prototype whose dimension is to be altered + * @param ParamDesc description of specified dimension + * @return None + * @note Exceptions: None + * @note History: 6/20/89, DSJ, Created. */ void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) { Proto->Distrib[i] = D_random; @@ -1380,16 +1369,15 @@ void MakeDimRandom(uinT16 i, PROTOTYPE *Proto, PARAM_DESC *ParamDesc) { // note that the proto Weight is irrelevant for D_random protos } // MakeDimRandom - /** * This routine alters the ith dimension of the specified * mixed prototype to be uniform. - * @param i index of dimension to be changed - * @param Proto prototype whose dimension is to be altered - * @param Statistics statistical info about prototype - * @return None - * @note Exceptions: None - * @note History: 6/20/89, DSJ, Created. + * @param i index of dimension to be changed + * @param Proto prototype whose dimension is to be altered + * @param Statistics statistical info about prototype + * @return None + * @note Exceptions: None + * @note History: 6/20/89, DSJ, Created. */ void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) { Proto->Distrib[i] = uniform; @@ -1410,7 +1398,6 @@ void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) { // note that the proto Weight is irrelevant for uniform protos } // MakeDimUniform - /** * This routine searches the cluster tree for all leaf nodes * which are samples in the specified cluster. It computes @@ -1420,12 +1407,12 @@ void MakeDimUniform(uinT16 i, PROTOTYPE *Proto, STATISTICS *Statistics) { * return this information to the caller. An incremental * algorithm for computing statistics is not used because * it will not work with circular dimensions. - * @param N number of dimensions - * @param ParamDesc array of dimension descriptions - * @param Cluster cluster whose stats are to be computed - * @return Pointer to new data structure containing statistics - * @note Exceptions: None - * @note History: 6/2/89, DSJ, Created. + * @param N number of dimensions + * @param ParamDesc array of dimension descriptions + * @param Cluster cluster whose stats are to be computed + * @return Pointer to new data structure containing statistics + * @note Exceptions: None + * @note History: 6/2/89, DSJ, Created. */ STATISTICS * ComputeStatistics (inT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { @@ -1502,19 +1489,18 @@ ComputeStatistics (inT16 N, PARAM_DESC ParamDesc[], CLUSTER * Cluster) { return (Statistics); } // ComputeStatistics - /** * This routine creates a spherical prototype data structure to * approximate the samples in the specified cluster. * Spherical prototypes have a single variance which is * common across all dimensions. All dimensions are normally * distributed and independent. - * @param N number of dimensions - * @param Cluster cluster to be made into a spherical prototype - * @param Statistics statistical info about samples in cluster - * @return Pointer to a new spherical prototype data structure - * @note Exceptions: None - * @note History: 6/19/89, DSJ, Created. + * @param N number of dimensions + * @param Cluster cluster to be made into a spherical prototype + * @param Statistics statistical info about samples in cluster + * @return Pointer to a new spherical prototype data structure + * @note Exceptions: None + * @note History: 6/19/89, DSJ, Created. */ PROTOTYPE *NewSphericalProto(uinT16 N, CLUSTER *Cluster, @@ -1537,18 +1523,17 @@ PROTOTYPE *NewSphericalProto(uinT16 N, return (Proto); } // NewSphericalProto - /** * This routine creates an elliptical prototype data structure to * approximate the samples in the specified cluster. * Elliptical prototypes have a variance for each dimension. * All dimensions are normally distributed and independent. - * @param N number of dimensions - * @param Cluster cluster to be made into an elliptical prototype - * @param Statistics statistical info about samples in cluster - * @return Pointer to a new elliptical prototype data structure - * @note Exceptions: None - * @note History: 6/19/89, DSJ, Created. + * @param N number of dimensions + * @param Cluster cluster to be made into an elliptical prototype + * @param Statistics statistical info about samples in cluster + * @return Pointer to a new elliptical prototype data structure + * @note Exceptions: None + * @note History: 6/19/89, DSJ, Created. */ PROTOTYPE *NewEllipticalProto(inT16 N, CLUSTER *Cluster, @@ -1579,7 +1564,6 @@ PROTOTYPE *NewEllipticalProto(inT16 N, return (Proto); } // NewEllipticalProto - /** * This routine creates a mixed prototype data structure to * approximate the samples in the specified cluster. @@ -1588,12 +1572,12 @@ PROTOTYPE *NewEllipticalProto(inT16 N, * structure is initially filled in as though it were an * elliptical prototype. The actual distributions of the * dimensions can be altered by other routines. - * @param N number of dimensions - * @param Cluster cluster to be made into a mixed prototype - * @param Statistics statistical info about samples in cluster - * @return Pointer to a new mixed prototype data structure - * @note Exceptions: None - * @note History: 6/19/89, DSJ, Created. + * @param N number of dimensions + * @param Cluster cluster to be made into a mixed prototype + * @param Statistics statistical info about samples in cluster + * @return Pointer to a new mixed prototype data structure + * @note Exceptions: None + * @note History: 6/19/89, DSJ, Created. */ PROTOTYPE *NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics) { PROTOTYPE *Proto; @@ -1609,16 +1593,15 @@ PROTOTYPE *NewMixedProto(inT16 N, CLUSTER *Cluster, STATISTICS *Statistics) { return (Proto); } // NewMixedProto - /** * This routine allocates memory to hold a simple prototype * data structure, i.e. one without independent distributions * and variances for each dimension. - * @param N number of dimensions - * @param Cluster cluster to be made into a prototype - * @return Pointer to new simple prototype - * @note Exceptions: None - * @note History: 6/19/89, DSJ, Created. + * @param N number of dimensions + * @param Cluster cluster to be made into a prototype + * @return Pointer to new simple prototype + * @note Exceptions: None + * @note History: 6/19/89, DSJ, Created. */ PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster) { PROTOTYPE *Proto; @@ -1640,7 +1623,6 @@ PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster) { return (Proto); } // NewSimpleProto - /** * This routine returns TRUE if the specified covariance * matrix indicates that all N dimensions are independent of @@ -1653,13 +1635,13 @@ PROTOTYPE *NewSimpleProto(inT16 N, CLUSTER *Cluster) { * coeff[ij] = stddev[ij] / sqrt (stddev[ii] * stddev[jj]) * The covariance matrix is assumed to be symmetric (which * should always be true). - * @param ParamDesc descriptions of each feature space dimension - * @param N number of dimensions - * @param CoVariance ptr to a covariance matrix - * @param Independence max off-diagonal correlation coefficient - * @return TRUE if dimensions are independent, FALSE otherwise - * @note Exceptions: None - * @note History: 6/4/89, DSJ, Created. + * @param ParamDesc descriptions of each feature space dimension + * @param N number of dimensions + * @param CoVariance ptr to a covariance matrix + * @param Independence max off-diagonal correlation coefficient + * @return TRUE if dimensions are independent, FALSE otherwise + * @note Exceptions: None + * @note History: 6/4/89, DSJ, Created. */ BOOL8 Independent (PARAM_DESC ParamDesc[], @@ -1692,7 +1674,6 @@ inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence) { return (TRUE); } // Independent - /** * This routine returns a histogram data structure which can * be used by other routines to place samples into histogram @@ -1703,12 +1684,12 @@ inT16 N, FLOAT32 * CoVariance, FLOAT32 Independence) { * created so that it minimizes the computation time needed * to create a new bucket. * @param clusterer which keeps a bucket_cache for us. - * @param Distribution type of probability distribution to test for - * @param SampleCount number of samples that are available - * @param Confidence probability of a Type I error - * @return Bucket data structure + * @param Distribution type of probability distribution to test for + * @param SampleCount number of samples that are available + * @param Confidence probability of a Type I error + * @return Bucket data structure * @note Exceptions: none - * @note History: Thu Aug 3 12:58:10 1989, DSJ, Created. + * @note History: Thu Aug 3 12:58:10 1989, DSJ, Created. */ BUCKETS *GetBuckets(CLUSTERER* clusterer, DISTRIBUTION Distribution, @@ -1739,7 +1720,6 @@ BUCKETS *GetBuckets(CLUSTERER* clusterer, return Buckets; } // GetBuckets - /** * This routine creates a histogram data structure which can * be used by other routines to place samples into histogram @@ -1751,12 +1731,12 @@ BUCKETS *GetBuckets(CLUSTERER* clusterer, * order to make this possible, a mapping table is * computed which maps "normalized" samples into the * appropriate bucket. - * @param Distribution type of probability distribution to test for - * @param SampleCount number of samples that are available - * @param Confidence probability of a Type I error + * @param Distribution type of probability distribution to test for + * @param SampleCount number of samples that are available + * @param Confidence probability of a Type I error * @return Pointer to new histogram data structure - * @note Exceptions: None - * @note History: 6/4/89, DSJ, Created. + * @note Exceptions: None + * @note History: 6/4/89, DSJ, Created. */ BUCKETS *MakeBuckets(DISTRIBUTION Distribution, uinT32 SampleCount, @@ -1840,7 +1820,6 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution, return Buckets; } // MakeBuckets - /** * This routine computes the optimum number of histogram * buckets that should be used in a chi-squared goodness of @@ -1851,7 +1830,7 @@ BUCKETS *MakeBuckets(DISTRIBUTION Distribution, * values. The table is intended for a 0.05 level of * significance (alpha). This routine assumes that it is * equally valid for other alpha's, which may not be true. - * @param SampleCount number of samples to be tested + * @param SampleCount number of samples to be tested * @return Optimum number of histogram buckets * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -1874,7 +1853,6 @@ uinT16 OptimumNumberOfBuckets(uinT32 SampleCount) { return kBucketsTable[Last]; } // OptimumNumberOfBuckets - /** * This routine computes the chi-squared value which will * leave a cumulative probability of Alpha in the right tail @@ -1887,8 +1865,8 @@ uinT16 OptimumNumberOfBuckets(uinT32 SampleCount) { * chi-squared value. Therefore, once a particular chi-squared * value is computed, it is stored in the list and never * needs to be computed again. - * @param DegreesOfFreedom determines shape of distribution - * @param Alpha probability of right tail + * @param DegreesOfFreedom determines shape of distribution + * @param Alpha probability of right tail * @return Desired chi-squared value * @note Exceptions: none * @note History: 6/5/89, DSJ, Created. @@ -1932,19 +1910,19 @@ ComputeChiSquared (uinT16 DegreesOfFreedom, FLOAT64 Alpha) } // ComputeChiSquared - /** * This routine computes the probability density function * of a discrete normal distribution defined by the global * variables kNormalMean, kNormalVariance, and kNormalMagnitude. * Normal magnitude could, of course, be computed in terms of * the normal variance but it is precomputed for efficiency. - * @param x number to compute the normal probability density for + * @param x number to compute the normal probability density for * @note Globals: - * kNormalMean mean of a discrete normal distribution - * kNormalVariance variance of a discrete normal distribution - * kNormalMagnitude magnitude of a discrete normal distribution - * @return The value of the normal distribution at x. + * kNormalMean mean of a discrete normal distribution + * kNormalVariance variance of a discrete normal distribution + * kNormalMagnitude magnitude of a discrete normal + *distribution + * @return The value of the normal distribution at x. * @note Exceptions: None * @note History: 6/4/89, DSJ, Created. */ @@ -1955,12 +1933,11 @@ FLOAT64 NormalDensity(inT32 x) { return kNormalMagnitude * exp(-0.5 * Distance * Distance / kNormalVariance); } // NormalDensity - /** * This routine computes the probability density function * of a uniform distribution at the specified point. The * range of the distribution is from 0 to BUCKETTABLESIZE. - * @param x number to compute the uniform probability density for + * @param x number to compute the uniform probability density for * @return The value of the uniform distribution at x. * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -1974,13 +1951,12 @@ FLOAT64 UniformDensity(inT32 x) { return (FLOAT64) 0.0; } // UniformDensity - /** * This routine computes a trapezoidal approximation to the * integral of a function over a small delta in x. - * @param f1 value of function at x1 - * @param f2 value of function at x2 - * @param Dx x2 - x1 (should always be positive) + * @param f1 value of function at x1 + * @param f2 value of function at x2 + * @param Dx x2 - x1 (should always be positive) * @return Approximation of the integral of the function from x1 to x2. * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -1989,7 +1965,6 @@ FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) { return (f1 + f2) * Dx / 2.0; } // Integral - /** * This routine counts the number of cluster samples which * fall within the various histogram buckets in Buckets. Only @@ -2002,12 +1977,12 @@ FLOAT64 Integral(FLOAT64 f1, FLOAT64 f2, FLOAT64 Dx) { * range and the StdDev is 1/2 the range. A dimension with * zero standard deviation cannot be statistically analyzed. * In this case, a pseudo-analysis is used. - * @param Buckets histogram buckets to count samples - * @param Cluster cluster whose samples are being analyzed - * @param Dim dimension of samples which is being analyzed - * @param ParamDesc description of the dimension - * @param Mean "mean" of the distribution - * @param StdDev "standard deviation" of the distribution + * @param Buckets histogram buckets to count samples + * @param Cluster cluster whose samples are being analyzed + * @param Dim dimension of samples which is being analyzed + * @param ParamDesc description of the dimension + * @param Mean "mean" of the distribution + * @param StdDev "standard deviation" of the distribution * @return None (the Buckets data structure is filled in) * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -2071,16 +2046,15 @@ void FillBuckets(BUCKETS *Buckets, } } // FillBuckets - /** * This routine determines which bucket x falls into in the * discrete normal distribution defined by kNormalMean * and kNormalStdDev. x values which exceed the range of * the discrete distribution are clipped. - * @param ParamDesc used to identify circular dimensions - * @param x value to be normalized - * @param Mean mean of normal distribution - * @param StdDev standard deviation of normal distribution + * @param ParamDesc used to identify circular dimensions + * @param x value to be normalized + * @param Mean mean of normal distribution + * @param StdDev standard deviation of normal distribution * @return Bucket number into which x falls * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -2107,16 +2081,15 @@ uinT16 NormalBucket(PARAM_DESC *ParamDesc, return (uinT16) floor((FLOAT64) X); } // NormalBucket - /** * This routine determines which bucket x falls into in the * discrete uniform distribution defined by * BUCKETTABLESIZE. x values which exceed the range of * the discrete distribution are clipped. - * @param ParamDesc used to identify circular dimensions - * @param x value to be normalized - * @param Mean center of range of uniform distribution - * @param StdDev 1/2 the range of the uniform distribution + * @param ParamDesc used to identify circular dimensions + * @param x value to be normalized + * @param Mean center of range of uniform distribution + * @param StdDev 1/2 the range of the uniform distribution * @return Bucket number into which x falls * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -2143,7 +2116,6 @@ uinT16 UniformBucket(PARAM_DESC *ParamDesc, return (uinT16) floor((FLOAT64) X); } // UniformBucket - /** * This routine performs a chi-square goodness of fit test * on the histogram data in the Buckets data structure. TRUE @@ -2151,7 +2123,7 @@ uinT16 UniformBucket(PARAM_DESC *ParamDesc, * distribution which was specified when the Buckets * structure was originally created. Otherwise FALSE is * returned. - * @param Buckets histogram data to perform chi-square test on + * @param Buckets histogram data to perform chi-square test on * @return TRUE if samples match distribution, FALSE otherwise * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -2176,11 +2148,10 @@ BOOL8 DistributionOK(BUCKETS *Buckets) { return TRUE; } // DistributionOK - /** * This routine frees the memory used by the statistics * data structure. - * @param Statistics pointer to data structure to be freed + * @param Statistics pointer to data structure to be freed * @return None * @note Exceptions: None * @note History: 6/5/89, DSJ, Created. @@ -2192,7 +2163,6 @@ void FreeStatistics(STATISTICS *Statistics) { memfree(Statistics); } // FreeStatistics - /** * This routine properly frees the memory used by a BUCKETS. * @@ -2204,13 +2174,12 @@ void FreeBuckets(BUCKETS *buckets) { Efree(buckets); } // FreeBuckets - /** * This routine frees the memory consumed by the specified * cluster and all of its subclusters. This is done by * recursive calls to FreeCluster(). * - * @param Cluster pointer to cluster to be freed + * @param Cluster pointer to cluster to be freed * * @return None * @@ -2225,7 +2194,6 @@ void FreeCluster(CLUSTER *Cluster) { } } // FreeCluster - /** * This routine computes the degrees of freedom that should * be used in a chi-squared test with the specified number of @@ -2234,8 +2202,8 @@ void FreeCluster(CLUSTER *Cluster) { * computed more easily. This will cause the value of * chi-squared to be higher than the optimum value, resulting * in the chi-square test being more lenient than optimum. - * @param Distribution distribution being tested for - * @param HistogramBuckets number of buckets in chi-square test + * @param Distribution distribution being tested for + * @param HistogramBuckets number of buckets in chi-square test * @return The number of degrees of freedom for a chi-square test * @note Exceptions: none * @note History: Thu Aug 3 14:04:18 1989, DSJ, Created. @@ -2252,7 +2220,6 @@ uinT16 DegreesOfFreedom(DISTRIBUTION Distribution, uinT16 HistogramBuckets) { } // DegreesOfFreedom - /** * This routine is used to search a list of histogram data * structures to find one with the specified number of @@ -2272,7 +2239,6 @@ int NumBucketsMatch(void *arg1, // BUCKETS *Histogram, } // NumBucketsMatch - /** * This routine is used to search a list for a list node * whose contents match Key. It is called by the list @@ -2287,13 +2253,12 @@ int ListEntryMatch(void *arg1, //ListNode } // ListEntryMatch - /** * This routine multiplies each ExpectedCount histogram entry * by NewSampleCount/OldSampleCount so that the histogram * is now adjusted to the new sample count. - * @param Buckets histogram data structure to adjust - * @param NewSampleCount new sample count to adjust to + * @param Buckets histogram data structure to adjust + * @param NewSampleCount new sample count to adjust to * @return none * @note Exceptions: none * @note History: Thu Aug 3 14:31:14 1989, DSJ, Created. @@ -2313,11 +2278,10 @@ void AdjustBuckets(BUCKETS *Buckets, uinT32 NewSampleCount) { } // AdjustBuckets - /** * This routine sets the bucket counts in the specified histogram * to zero. - * @param Buckets histogram data structure to init + * @param Buckets histogram data structure to init * @return none * @note Exceptions: none * @note History: Thu Aug 3 14:31:14 1989, DSJ, Created. @@ -2331,7 +2295,6 @@ void InitBuckets(BUCKETS *Buckets) { } // InitBuckets - /** * This routine is used to search a list of structures which * hold pre-computed chi-squared values for a chi-squared @@ -2355,14 +2318,13 @@ int AlphaMatch(void *arg1, //CHISTRUCT *ChiStruct } // AlphaMatch - /** * This routine allocates a new data structure which is used * to hold a chi-squared value along with its associated * number of degrees of freedom and alpha value. * - * @param DegreesOfFreedom degrees of freedom for new chi value - * @param Alpha confidence level for new chi value + * @param DegreesOfFreedom degrees of freedom for new chi value + * @param Alpha confidence level for new chi value * @return none * @note Exceptions: none * @note History: Fri Aug 4 11:04:59 1989, DSJ, Created. @@ -2377,7 +2339,6 @@ CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha) { } // NewChiStruct - /** * This routine attempts to find an x value at which Function * goes to zero (i.e. a root of the function ). It will only @@ -2385,10 +2346,10 @@ CHISTRUCT *NewChiStruct(uinT16 DegreesOfFreedom, FLOAT64 Alpha) { * are no extrema between the solution and the InitialGuess. * The algorithms used are extremely primitive. * - * @param Function function whose zero is to be found - * @param FunctionParams arbitrary data to pass to function - * @param InitialGuess point to start solution search at - * @param Accuracy maximum allowed error + * @param Function function whose zero is to be found + * @param FunctionParams arbitrary data to pass to function + * @param InitialGuess point to start solution search at + * @param Accuracy maximum allowed error * @return Solution of function ( x for which f(x) = 0 ). * @note Exceptions: none * @note History: Fri Aug 4 11:08:59 1989, DSJ, Created. @@ -2440,7 +2401,6 @@ void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy) } // Solve - /** * This routine computes the area under a chi density curve * from 0 to x, minus the desired area under the curve. The @@ -2455,8 +2415,8 @@ void *FunctionParams, FLOAT64 InitialGuess, FLOAT64 Accuracy) * integrating the chi density curve in parts to obtain * a series that can be used to compute the area under the * curve. - * @param ChiParams contains degrees of freedom and alpha - * @param x value of chi-squared to evaluate + * @param ChiParams contains degrees of freedom and alpha + * @param x value of chi-squared to evaluate * @return Error between actual and desired area under the chi curve. * @note Exceptions: none * @note History: Fri Aug 4 12:48:41 1989, DSJ, Created. @@ -2480,7 +2440,6 @@ FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) { } // ChiArea - /** * This routine looks at all samples in the specified cluster. * It computes a running estimate of the percentage of the @@ -2498,10 +2457,10 @@ FLOAT64 ChiArea(CHISTRUCT *ChiParams, FLOAT64 x) { * contained in the same cluster, then the cluster should be * split. * - * @param Clusterer data structure holding cluster tree - * @param Cluster cluster containing samples to be tested - * @param MaxIllegal max percentage of samples allowed to have - * more than 1 feature in the cluster + * @param Clusterer data structure holding cluster tree + * @param Cluster cluster containing samples to be tested + * @param MaxIllegal max percentage of samples allowed to have + * more than 1 feature in the cluster * @return TRUE if the cluster should be split, FALSE otherwise. * @note Exceptions: none * @note History: Wed Aug 30 11:13:05 1989, DSJ, Created. @@ -2562,7 +2521,7 @@ CLUSTER * Cluster, FLOAT32 MaxIllegal) } // MultipleCharSamples /** - * Compute the inverse of a matrix using LU decomposition with partial pivoting. + * Compute the inverse of a matrix using LU decomposition with partial pivoting. * The return value is the sum of norms of the off-diagonal terms of the * product of a and inv. (A measure of the error.) */ diff --git a/classify/clusttool.cpp b/classify/clusttool.cpp index d86c3a2407..02e619d273 100644 --- a/classify/clusttool.cpp +++ b/classify/clusttool.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: clustertool.c - ** Purpose: Misc. tools for use with the clustering routines - ** Author: Dan Johnson - ** History: 6/6/89, DSJ, Created. + ** Filename: clustertool.c + ** Purpose: Misc. tools for use with the clustering routines + ** Author: Dan Johnson + ** History: 6/6/89, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -26,9 +26,10 @@ #include //---------------Global Data Definitions and Declarations-------------------- -#define TOKENSIZE 80 //< max size of tokens read from an input file -#define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space -//#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block size) +#define TOKENSIZE 80 //< max size of tokens read from an input file +#define MAXSAMPLESIZE 65535 //< max num of dimensions in feature space +//#define MAXBLOCKSIZE 65535 //< max num of samples in a character (block +// size) /** * This routine reads a single integer from the specified @@ -37,7 +38,7 @@ * @param File open text file to read sample size from * @return Sample size * @note Globals: None - * @note Exceptions: ILLEGALSAMPLESIZE illegal format or range + * @note Exceptions: ILLEGALSAMPLESIZE illegal format or range * @note History: 6/6/89, DSJ, Created. */ uinT16 ReadSampleSize(FILE *File) { @@ -293,7 +294,7 @@ FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) { if (NumFloatsRead != 1) { if ((NumFloatsRead == EOF) && (i == 0)) { if (needs_free) { - Efree(Buffer); + Efree(Buffer); } return NULL; } else { @@ -315,8 +316,7 @@ FLOAT32* ReadNFloats(FILE * File, uinT16 N, FLOAT32 Buffer[]) { * @note Exceptions: None * @note History: 6/6/89, DSJ, Created. */ -void -WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) { +void WriteParamDesc(FILE *File, uinT16 N, const PARAM_DESC ParamDesc[]) { int i; for (i = 0; i < N; i++) { @@ -446,15 +446,10 @@ void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) { * @note History: 6/12/89, DSJ, Created. */ -void WriteProtoList( - FILE *File, - uinT16 N, - PARAM_DESC ParamDesc[], - LIST ProtoList, - BOOL8 WriteSigProtos, - BOOL8 WriteInsigProtos) -{ - PROTOTYPE *Proto; +void WriteProtoList(FILE *File, uinT16 N, PARAM_DESC ParamDesc[], + LIST ProtoList, BOOL8 WriteSigProtos, + BOOL8 WriteInsigProtos) { + PROTOTYPE *Proto; /* write file header */ fprintf(File,"%0d\n",N); @@ -464,8 +459,8 @@ void WriteProtoList( iterate(ProtoList) { Proto = (PROTOTYPE *) first_node ( ProtoList ); - if (( Proto->Significant && WriteSigProtos ) || - ( ! Proto->Significant && WriteInsigProtos ) ) - WritePrototype( File, N, Proto ); + if ((Proto->Significant && WriteSigProtos) || + (!Proto->Significant && WriteInsigProtos)) + WritePrototype(File, N, Proto); } } diff --git a/classify/clusttool.h b/classify/clusttool.h index e82fa1ef48..a6fe38ea48 100644 --- a/classify/clusttool.h +++ b/classify/clusttool.h @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: clusttool.h - ** Purpose: Definition of clustering utility tools - ** Author: Dan Johnson - ** History: 6/6/89, DSJ, Created. + ** Filename: clusttool.h + ** Purpose: Definition of clustering utility tools + ** Author: Dan Johnson + ** History: 6/6/89, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -36,7 +36,7 @@ PROTOSTYLE ReadProtoStyle(FILE *File); FLOAT32 *ReadNFloats (FILE * File, uinT16 N, FLOAT32 Buffer[]); -void WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]); +void WriteParamDesc(FILE *File, uinT16 N, const PARAM_DESC ParamDesc[]); void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto); @@ -44,13 +44,9 @@ void WriteNFloats (FILE * File, uinT16 N, FLOAT32 Array[]); void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle); -void WriteProtoList( - FILE *File, - uinT16 N, - PARAM_DESC ParamDesc[], - LIST ProtoList, - BOOL8 WriteSigProtos, - BOOL8 WriteInsigProtos); +void WriteProtoList(FILE *File, uinT16 N, PARAM_DESC ParamDesc[], + LIST ProtoList, BOOL8 WriteSigProtos, + BOOL8 WriteInsigProtos); //--------------Global Data Definitions and Declarations--------------------- // define errors that can be trapped diff --git a/classify/cutoffs.cpp b/classify/cutoffs.cpp index 4f6417149a..ffb8692ef1 100644 --- a/classify/cutoffs.cpp +++ b/classify/cutoffs.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: cutoffs.c - ** Purpose: Routines to manipulate an array of class cutoffs. - ** Author: Dan Johnson - ** History: Wed Feb 20 09:28:51 1991, DSJ, Created. + ** Filename: cutoffs.c + ** Purpose: Routines to manipulate an array of class cutoffs. + ** Author: Dan Johnson + ** History: Wed Feb 20 09:28:51 1991, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at diff --git a/classify/featdefs.cpp b/classify/featdefs.cpp index ad7b799675..dd31f91d86 100644 --- a/classify/featdefs.cpp +++ b/classify/featdefs.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: featdefs.c - ** Purpose: Definitions of currently defined feature types. - ** Author: Dan Johnson - ** History: Mon May 21 10:26:21 1990, DSJ, Created. + ** Filename: featdefs.c + ** Purpose: Definitions of currently defined feature types. + ** Author: Dan Johnson + ** History: Mon May 21 10:26:21 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -289,13 +289,13 @@ CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, * the feature type for the feature with the specified short * name. Trap an error if the specified name is not found. * - * Globals: + * Globals: * - none * * @param FeatureDefs definitions of feature types/extractors * @param ShortName short name of a feature type * @return Feature type which corresponds to ShortName. - * @note Exceptions: + * @note Exceptions: * - ILLEGAL_SHORT_NAME * @note History: Wed May 23 15:36:05 1990, DSJ, Created. */ diff --git a/classify/fpoint.cpp b/classify/fpoint.cpp index 854bea7b7e..ff5b7b7cf7 100644 --- a/classify/fpoint.cpp +++ b/classify/fpoint.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: fpoint.c - ** Purpose: Abstract data type for a 2D point (floating point coords) - ** Author: Dan Johnson - ** History: Thu Apr 12 10:44:15 1990, DSJ, Created. + ** Filename: fpoint.c + ** Purpose: Abstract data type for a 2D point (floating point coords) + ** Author: Dan Johnson + ** History: Thu Apr 12 10:44:15 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -58,5 +58,4 @@ FLOAT32 NormalizedAngleFrom(FPOINT *Point1, if (Angle < 0.0 || Angle >= FullScale) Angle = 0.0; return (Angle); - } diff --git a/classify/intmatcher.cpp b/classify/intmatcher.cpp index 8fc135ea45..ff999608f2 100644 --- a/classify/intmatcher.cpp +++ b/classify/intmatcher.cpp @@ -295,7 +295,8 @@ class ClassPruner { HeapSort(num_classes_, sort_key_, sort_index_); } - /** Prints debug info on the class pruner matches for the pruned classes only. */ + /** Prints debug info on the class pruner matches for the pruned classes only. + */ void DebugMatch(const Classify& classify, const INT_TEMPLATES_STRUCT* int_templates, const INT_FEATURE_STRUCT* features) const { @@ -370,8 +371,9 @@ class ClassPruner { private: /** Array[rounded_classes_] of initial counts for each class. */ int *class_count_; - /// Array[rounded_classes_] of modified counts for each class after normalizing - /// for expected number of features, disabled classes, fragments, and xheights. + /// Array[rounded_classes_] of modified counts for each class after + /// normalizing for expected number of features, disabled classes, fragments, + /// and xheights. int *norm_count_; /** Array[rounded_classes_ +1] of pruned counts that gets sorted */ int *sort_key_; @@ -402,8 +404,9 @@ class ClassPruner { * normalization process (by CLASS_INDEX) * @param expected_num_features Array of expected number of features * for each class (by CLASS_INDEX) - * @param results Sorted Array of pruned classes. Must be an array - * of size at least int_templates->NumClasses. + * @param results Sorted Array of pruned classes. Must be an + * array of size at least + * int_templates->NumClasses. * @param keep_this */ int Classify::PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, @@ -606,7 +609,6 @@ int IntegerMatcher::FindGoodProtos( return NumGoodProtos; } - /** * FindBadFeatures finds all features with maximum feature-evidence < * AdaptFeatureThresh. The list is ordered by increasing feature number. @@ -701,7 +703,6 @@ void IntegerMatcher::Init(tesseract::IntParam *classify_debug_level) { evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1); } - /*---------------------------------------------------------------------------- Private Code ----------------------------------------------------------------------------*/ @@ -717,8 +718,6 @@ void ScratchEvidence::ClearFeatureEvidence(const INT_CLASS class_template) { class_template->NumConfigs * sizeof(feature_evidence_[0])); } - - /** * Print debugging information for Configuations * @return none @@ -742,7 +741,6 @@ void IMDebugConfiguration(int FeatureNum, cprintf ("\n"); } - /** * Print debugging information for Configuations * @return none @@ -795,10 +793,10 @@ int IntegerMatcher::UpdateTablesForFeature( uinT32 XFeatureAddress; uinT32 YFeatureAddress; uinT32 ThetaFeatureAddress; - uinT8 *UINT8Pointer; + uinT8* UINT8Pointer; int ProtoIndex; uinT8 Temp; - int *IntPointer; + int* IntPointer; int ConfigNum; inT32 M3; inT32 A3; @@ -916,7 +914,6 @@ int IntegerMatcher::UpdateTablesForFeature( return SumOverConfigs; } - /** * Print debugging information for Configuations * @return none @@ -1165,8 +1162,6 @@ void ScratchEvidence::UpdateSumOfProtoEvidences( } } - - /** * Normalize Sum of Proto and Feature Evidence by dividing by the sum of * the Feature Lengths and the Proto Lengths for each configuration. @@ -1180,7 +1175,6 @@ void ScratchEvidence::NormalizeSums( } } - /** * Find the best match for the current class and update the Result * with the configuration and match rating. diff --git a/classify/intmatcher.h b/classify/intmatcher.h index 46dbfc5a8d..df678d75ed 100644 --- a/classify/intmatcher.h +++ b/classify/intmatcher.h @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: intmatcher.h - ** Purpose: Interface to high level generic classifier routines. - ** Author: Robert Moss - ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created. + ** Filename: intmatcher.h + ** Purpose: Interface to high level generic classifier routines. + ** Author: Robert Moss + ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at diff --git a/classify/intproto.cpp b/classify/intproto.cpp index 4c2f0d9536..3bbb2777a0 100644 --- a/classify/intproto.cpp +++ b/classify/intproto.cpp @@ -326,10 +326,8 @@ int AddIntProto(INT_CLASS Class) { Word < Proto->Configs + WERDS_PER_CONFIG_VEC; *Word++ = 0); return (Index); - } - /** * This routine adds Proto to the class pruning tables * for the specified class in Templates. @@ -372,7 +370,6 @@ void AddProtoToClassPruner (PROTO Proto, CLASS_ID ClassId, } } /* AddProtoToClassPruner */ - /** * This routine updates the proto pruner lookup tables * for Class to include a new proto identified by ProtoId @@ -432,7 +429,6 @@ void AddProtoToProtoPruner(PROTO Proto, int ProtoId, FillPPLinearBits(ProtoSet->ProtoPruner[PRUNER_Y], Index, Y, Pad, debug); } /* AddProtoToProtoPruner */ - /** * Returns a quantized bucket for the given param shifted by offset, * notionally (param + offset) * num_buckets, but clipped and casted to the @@ -550,7 +546,6 @@ void Classify::ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class) { P->A, P->B, P->C, Class->ProtoLengths[ProtoId]); } /* ConvertProto */ - /** * This routine converts from the old floating point format * to the new integer format. @@ -627,7 +622,7 @@ INT_TEMPLATES Classify::CreateIntTemplates(CLASSES FloatProtos, * @note Exceptions: none * @note History: Thu Mar 21 14:45:04 1991, DSJ, Created. */ -void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) { +void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, FLOAT32 Evidence) { ScrollView::Color color = GetMatchColorFor(Evidence); RenderIntFeature(IntMatchWindow, Feature, color); if (FeatureDisplayWindow) { @@ -635,7 +630,6 @@ void DisplayIntFeature(const INT_FEATURE_STRUCT* Feature, FLOAT32 Evidence) { } } /* DisplayIntFeature */ - /** * This routine renders the specified proto into a * global display list. @@ -720,7 +714,6 @@ void free_int_class(INT_CLASS int_class) { Efree(int_class); } - /** * This routine allocates a new set of integer templates * initialized to hold 0 classes. @@ -1218,7 +1211,6 @@ FLOAT32 BucketStart(int Bucket, FLOAT32 Offset, int NumBuckets) { } /* BucketStart */ - /** * This routine returns the parameter value which * corresponds to the end of the specified bucket. @@ -1236,7 +1228,6 @@ FLOAT32 BucketEnd(int Bucket, FLOAT32 Offset, int NumBuckets) { return (((FLOAT32) (Bucket + 1) / NumBuckets) - Offset); } /* BucketEnd */ - /** * This routine fills in the section of a class pruner * corresponding to a single x value for a single proto of @@ -1284,7 +1275,6 @@ void DoFill(FILL_SPEC *FillSpec, } } /* DoFill */ - /** * Return TRUE if the specified table filler is done, i.e. * if it has no more lines to fill. @@ -1306,7 +1296,6 @@ BOOL8 FillerDone(TABLE_FILLER *Filler) { } /* FillerDone */ - /** * This routine sets Bit in each bit vector whose * bucket lies within the range Center +- Spread. The fill @@ -1349,7 +1338,6 @@ void FillPPCircularBits(uinT32 ParamTable[NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR], } /* FillPPCircularBits */ - /** * This routine sets Bit in each bit vector whose * bucket lies within the range Center +- Spread. The fill @@ -1516,7 +1504,6 @@ void GetCPPadsForLevel(int Level, } /* GetCPPadsForLevel */ - /** * @param Evidence evidence value to return color for * @return Color which corresponds to specified Evidence value. @@ -1538,7 +1525,6 @@ ScrollView::Color GetMatchColorFor(FLOAT32 Evidence) { return ScrollView::BLUE; } /* GetMatchColorFor */ - /** * This routine returns (in Fill) the specification of * the next line to be filled from Filler. FillerDone() should @@ -1589,7 +1575,6 @@ void GetNextFill(TABLE_FILLER *Filler, FILL_SPEC *Fill) { } /* GetNextFill */ - /** * This routine computes a data structure (Filler) * which can be used to fill in a rectangle surrounding @@ -1787,7 +1772,6 @@ void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT* Feature, window->DrawTo(X + Dx, Y + Dy); } /* RenderIntFeature */ - /** * This routine extracts the parameters of the specified * proto from the class description and adds a rendering of diff --git a/classify/kdtree.cpp b/classify/kdtree.cpp index 61a94f66cc..6ba7086d23 100644 --- a/classify/kdtree.cpp +++ b/classify/kdtree.cpp @@ -70,11 +70,11 @@ class MinK { const Element* elements() { return elements_; } private: - const Key max_key_; //< the maximum possible Key - Element* elements_; //< unsorted array of elements + const Key max_key_; //< the maximum possible Key + Element *elements_; //< unsorted array of elements int elements_count_; //< the number of results collected so far - int k_; //< the number of results we want from the search - int max_index_; //< the index of the result with the largest key + int k_; //< the number of results we want from the search + int max_index_; //< the index of the result with the largest key }; template @@ -117,7 +117,8 @@ bool MinK::insert(Key key, Value value) { //----------------------------------------------------------------------------- -/** Helper class for searching for the k closest points to query_point in tree. */ +/** Helper class for searching for the k closest points to query_point in tree. + */ class KDTreeSearch { public: KDTreeSearch(KDTREE* tree, FLOAT32 *query_point, int k_closest); @@ -241,14 +242,13 @@ void KDStore(KDTREE *Tree, FLOAT32 *Key, void *Data) { *PtrToNode = MakeKDNode(Tree, Key, (void *) Data, Level); } /* KDStore */ - /** - * This routine deletes a node from Tree. The node to be - * deleted is specified by the Key for the node and the Data - * contents of the node. These two pointers must be identical - * to the pointers that were used for the node when it was - * originally stored in the tree. A node will be deleted from - * the tree only if its key and data pointers are identical + * This routine deletes a node from Tree. The node to be + * deleted is specified by the Key for the node and the Data + * contents of the node. These two pointers must be identical + * to the pointers that were used for the node when it was + * originally stored in the tree. A node will be deleted from + * the tree only if its key and data pointers are identical * to Key and Data respectively. The tree is re-formed by removing * the affected subtree and inserting all elements but the root. * @@ -298,7 +298,6 @@ KDDelete (KDTREE * Tree, FLOAT32 Key[], void *Data) { } } /* KDDelete */ - /** * This routine searches the K-D tree specified by Tree and * finds the QuerySize nearest neighbors of Query. All neighbors @@ -442,7 +441,7 @@ void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) { /*---------------------------------------------------------------------------*/ -/** +/** *Returns the Euclidean distance squared between p1 and p2 for all essential * dimensions. * @param k keys are in k-space @@ -541,7 +540,6 @@ void Walk(KDTREE *tree, void_proc action, void *context, Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level)); } - /** Given a subtree nodes, insert all of its elements into tree. */ void InsertNodes(KDTREE *tree, KDNODE *nodes) { if (nodes == NULL) diff --git a/classify/mf.cpp b/classify/mf.cpp index d0c59487e6..37cd2eca0a 100644 --- a/classify/mf.cpp +++ b/classify/mf.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: mf.c - ** Purpose: Micro-feature interface to flexible feature extractor. - ** Author: Dan Johnson - ** History: Thu May 24 09:08:38 1990, DSJ, Created. + ** Filename: mf.c + ** Purpose: Micro-feature interface to flexible feature extractor. + ** Author: Dan Johnson + ** History: Thu May 24 09:08:38 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -36,7 +36,7 @@ * Call the old micro-feature extractor and then copy * the features into the new format. Then deallocate the * old micro-features. - * @param Blob blob to extract micro-features from + * @param Blob blob to extract micro-features from * @param cn_denorm control parameter to feature extractor. * @return Micro-features for Blob. * @note Exceptions: none diff --git a/classify/mfdefs.cpp b/classify/mfdefs.cpp index abe8d0c71a..0f225e8b08 100644 --- a/classify/mfdefs.cpp +++ b/classify/mfdefs.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: mfdefs.c - ** Purpose: Basic routines for manipulating micro-features - ** Author: Dan Johnson - ** History: Mon Jan 22 08:48:58 1990, DSJ, Created. + ** Filename: mfdefs.c + ** Purpose: Basic routines for manipulating micro-features + ** Author: Dan Johnson + ** History: Mon Jan 22 08:48:58 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -32,7 +32,7 @@ * @return New MICROFEATURE * @note History: 7/27/89, DSJ, Created. */ -MICROFEATURE NewMicroFeature() { +MICROFEATURE NewMicroFeature() { return ((MICROFEATURE) Emalloc (sizeof (MFBLOCK))); } /* NewMicroFeature */ @@ -41,10 +41,10 @@ MICROFEATURE NewMicroFeature() { /** * This routine deallocates all of the memory consumed by * a list of micro-features. - * @param MicroFeatures list of micro-features to be freed + * @param MicroFeatures list of micro-features to be freed * @return none * @note History: 7/27/89, DSJ, Created. */ -void FreeMicroFeatures(MICROFEATURES MicroFeatures) { +void FreeMicroFeatures(MICROFEATURES MicroFeatures) { destroy_nodes(MicroFeatures, Efree); } /* FreeMicroFeatures */ diff --git a/classify/mfoutline.cpp b/classify/mfoutline.cpp index 511c34d41f..59593a8523 100644 --- a/classify/mfoutline.cpp +++ b/classify/mfoutline.cpp @@ -35,7 +35,8 @@ ----------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ -/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). */ +/** Convert a blob into a list of MFOUTLINEs (float-based microfeature format). + */ LIST ConvertBlob(TBLOB *blob) { LIST outlines = NIL_LIST; return (blob == NULL) @@ -344,7 +345,6 @@ void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { } /* ChangeDirection */ - /** * This routine normalizes each point in Outline by * translating it to the specified center and scaling it @@ -378,7 +378,6 @@ void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) { } /* CharNormalizeOutline */ - /** * This routine computes the slope from Start to Finish and * and then computes the approximate direction of the line diff --git a/classify/mfx.cpp b/classify/mfx.cpp index 3da4fb3d0f..6fd8ed5da5 100644 --- a/classify/mfx.cpp +++ b/classify/mfx.cpp @@ -128,7 +128,6 @@ FLOAT32 ComputeOrientation(MFEDGEPT *Start, MFEDGEPT *End) { return (Orientation); } /* ComputeOrientation */ - /** * Convert Outline to MicroFeatures * @param Outline outline to extract micro-features from @@ -164,7 +163,6 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, return (MicroFeatures); } /* ConvertToMicroFeatures */ - /** * This routine computes the feature parameters which describe * the micro-feature that starts and Start and ends at End. @@ -178,7 +176,7 @@ MICROFEATURES ConvertToMicroFeatures(MFOUTLINE Outline, * @return New micro-feature or NULL if the feature was rejected. * @note Globals: none * @note Exceptions: none - * @note History: + * @note History: * - 7/26/89, DSJ, Created. * - 11/17/89, DSJ, Added handling for Start and End same point. */ diff --git a/classify/mfx.h b/classify/mfx.h index 05ce29cee5..5ed006dcc7 100644 --- a/classify/mfx.h +++ b/classify/mfx.h @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: mfx.h - ** Purpose: Definition of micro-feature extraction routines - ** Author: Dan Johnson - ** History: 5/29/89, DSJ, Created. + ** Filename: mfx.h + ** Purpose: Definition of micro-feature extraction routines + ** Author: Dan Johnson + ** History: 5/29/89, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at diff --git a/classify/normfeat.cpp b/classify/normfeat.cpp index a4ac672a11..f297b3b05d 100644 --- a/classify/normfeat.cpp +++ b/classify/normfeat.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: normfeat.c - ** Purpose: Definition of char normalization features. - ** Author: Dan Johnson - ** History: 12/14/90, DSJ, Created. + ** Filename: normfeat.c + ** Purpose: Definition of char normalization features. + ** Author: Dan Johnson + ** History: 12/14/90, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -33,7 +33,6 @@ FLOAT32 ActualOutlineLength(FEATURE Feature) { return (Feature->Params[CharNormLength] * LENGTH_COMPRESSION); } - /** * Return the character normalization feature for a blob. * diff --git a/classify/normmatch.cpp b/classify/normmatch.cpp index 488cd1652f..b0e066a89d 100644 --- a/classify/normmatch.cpp +++ b/classify/normmatch.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: normmatch.c - ** Purpose: Simple matcher based on character normalization features. - ** Author: Dan Johnson - ** History: Wed Dec 19 16:18:06 1990, DSJ, Created. + ** Filename: normmatch.c + ** Purpose: Simple matcher based on character normalization features. + ** Author: Dan Johnson + ** History: Wed Dec 19 16:18:06 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -197,10 +197,10 @@ double NormEvidenceOf(register double NormAdj) { /*---------------------------------------------------------------------------*/ /** * This routine dumps out detailed normalization match info. - * @param File open text file to dump match debug info to - * @param NumParams # of parameters in proto and feature - * @param Proto[] array of prototype parameters - * @param Feature[] array of feature parameters + * @param File open text file to dump match debug info to + * @param NumParams # of parameters in proto and feature + * @param Proto[] array of prototype parameters + * @param Feature[] array of feature parameters * Globals: none * @return none * @note Exceptions: none diff --git a/classify/ocrfeatures.cpp b/classify/ocrfeatures.cpp index 0895ed0886..7df8135048 100644 --- a/classify/ocrfeatures.cpp +++ b/classify/ocrfeatures.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: features.c - ** Purpose: Generic definition of a feature. - ** Author: Dan Johnson - ** History: Mon May 21 10:49:04 1990, DSJ, Created. + ** Filename: features.c + ** Purpose: Generic definition of a feature. + ** Author: Dan Johnson + ** History: Mon May 21 10:49:04 1990, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -66,12 +66,11 @@ void FreeFeature(FEATURE Feature) { } /* FreeFeature */ - /** * Release the memory consumed by the specified feature * set. This routine also frees the memory consumed by the * features contained in the set. - * @param FeatureSet set of features to be freed + * @param FeatureSet set of features to be freed * @return none * @note History: Mon May 21 13:59:46 1990, DSJ, Created. */ @@ -85,11 +84,10 @@ void FreeFeatureSet(FEATURE_SET FeatureSet) { } } /* FreeFeatureSet */ - /** * Allocate and return a new feature of the specified * type. - * @param FeatureDesc description of feature to be created. + * @param FeatureDesc description of feature to be created. * @return New #FEATURE. * @note History: Mon May 21 14:06:42 1990, DSJ, Created. */ @@ -105,11 +103,10 @@ FEATURE NewFeature(const FEATURE_DESC_STRUCT* FeatureDesc) { } /* NewFeature */ - /** * Allocate and return a new feature set large enough to * hold the specified number of features. - * @param NumFeatures maximum # of features to be put in feature set + * @param NumFeatures maximum # of features to be put in feature set * @return New #FEATURE_SET. * @note History: Mon May 21 14:22:40 1990, DSJ, Created. */ @@ -124,7 +121,6 @@ FEATURE_SET NewFeatureSet(int NumFeatures) { } /* NewFeatureSet */ - /** * Create a new feature of the specified type and read in * the value of its parameters from File. The extra penalty @@ -135,10 +131,11 @@ FEATURE_SET NewFeatureSet(int NumFeatures) { * @param File open text file to read feature from * @param FeatureDesc specifies type of feature to read from File * @return New #FEATURE read from File. - * @note Exceptions: #ILLEGAL_FEATURE_PARAM if text file doesn't match expected format + * @note Exceptions: #ILLEGAL_FEATURE_PARAM if text file doesn't match expected + * format * @note History: Wed May 23 08:53:16 1990, DSJ, Created. */ -FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { +FEATURE ReadFeature(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { FEATURE Feature; int i; @@ -153,7 +150,6 @@ FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { return (Feature); } /* ReadFeature */ - /** * Create a new feature set of the specified type and read in * the features from File. The correct text representation @@ -165,7 +161,7 @@ FEATURE ReadFeature(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { * @return New feature set read from File. * @note History: Wed May 23 09:17:31 1990, DSJ, Created. */ -FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { +FEATURE_SET ReadFeatureSet(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { FEATURE_SET FeatureSet; int NumFeatures; int i; @@ -180,7 +176,6 @@ FEATURE_SET ReadFeatureSet(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { return (FeatureSet); } /* ReadFeatureSet */ - /** * Appends a textual representation of Feature to str. * This representation is simply a list of the N parameters @@ -203,7 +198,6 @@ void WriteFeature(FEATURE Feature, STRING* str) { *str += "\n"; } /* WriteFeature */ - /** * Write a textual representation of FeatureSet to File. * This representation is an integer specifying the number of @@ -224,7 +218,6 @@ void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) { } } /* WriteFeatureSet */ - /** * Write a textual representation of FeatureDesc to File * in the old format (i.e. the format used by the clusterer). @@ -240,7 +233,7 @@ void WriteFeatureSet(FEATURE_SET FeatureSet, STRING* str) { * @return none * @note History: Fri May 25 15:27:18 1990, DSJ, Created. */ -void WriteOldParamDesc(FILE *File, const FEATURE_DESC_STRUCT* FeatureDesc) { +void WriteOldParamDesc(FILE* File, const FEATURE_DESC_STRUCT* FeatureDesc) { int i; fprintf (File, "%d\n", FeatureDesc->NumParams); diff --git a/classify/outfeat.cpp b/classify/outfeat.cpp index b1a4a9be90..76597f7c15 100644 --- a/classify/outfeat.cpp +++ b/classify/outfeat.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: outfeat.c - ** Purpose: Definition of outline-features. - ** Author: Dan Johnson - ** History: 11/13/90, DSJ, Created. + ** Filename: outfeat.c + ** Purpose: Definition of outline-features. + ** Author: Dan Johnson + ** History: 11/13/90, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -40,7 +40,7 @@ namespace tesseract { * @return Outline-features for Blob. * @note Globals: none * @note Exceptions: none - * @note History: + * @note History: * - 11/13/90, DSJ, Created. * - 05/24/91, DSJ, Updated for either char or baseline normalize. */ @@ -115,7 +115,7 @@ void AddOutlineFeatureToSet(FPOINT *Start, * @return none (results are returned in FeatureSet) * @note Globals: none * @note Exceptions: none - * @note History: + * @note History: * - 11/13/90, DSJ, Created. * - 5/24/91, DSJ, Added hidden edge capability. */ diff --git a/classify/picofeat.cpp b/classify/picofeat.cpp index 74beb18f35..a4a39263cf 100644 --- a/classify/picofeat.cpp +++ b/classify/picofeat.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: picofeat.c - ** Purpose: Definition of pico-features. - ** Author: Dan Johnson - ** History: 9/4/90, DSJ, Created. + ** Filename: picofeat.c + ** Purpose: Definition of pico-features. + ** Author: Dan Johnson + ** History: 9/4/90, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -98,7 +98,7 @@ FEATURE_SET Classify::ExtractPicoFeatures(TBLOB *Blob) { * nearest whole number of pico-features. The pico-features * are spaced evenly over the entire segment. * Globals: - * - classify_pico_feature_length length of a single pico-feature + * - classify_pico_feature_length length of a single pico-feature * @param Start starting point of pico-feature * @param End ending point of pico-feature * @param FeatureSet set to add pico-feature to diff --git a/cutil/bitvec.h b/cutil/bitvec.h index f70d748b91..d2a364d261 100644 --- a/cutil/bitvec.h +++ b/cutil/bitvec.h @@ -30,29 +30,29 @@ typedef uinT32 *BIT_VECTOR; /*----------------------------------------------------------------------------- Public Function Prototypes -----------------------------------------------------------------------------*/ -#define zero_all_bits(array,length) \ -{\ - int index; /*temporary index*/\ -\ -for (index=0;indexparams()), STRING_INIT_MEMBER(user_words_suffix, "", "A suffix of user-provided words located in tessdata.", @@ -54,33 +53,41 @@ Dict::Dict(CCUtil* ccutil) getCCUtil()->params()), BOOL_INIT_MEMBER(load_unambig_dawg, true, "Load unambiguous word dawg.", getCCUtil()->params()), - BOOL_INIT_MEMBER(load_punc_dawg, true, "Load dawg with punctuation" - " patterns.", getCCUtil()->params()), - BOOL_INIT_MEMBER(load_number_dawg, true, "Load dawg with number" - " patterns.", getCCUtil()->params()), - BOOL_INIT_MEMBER(load_bigram_dawg, true, "Load dawg with special word " - "bigrams.", getCCUtil()->params()), + BOOL_INIT_MEMBER(load_punc_dawg, true, + "Load dawg with punctuation" + " patterns.", + getCCUtil()->params()), + BOOL_INIT_MEMBER(load_number_dawg, true, + "Load dawg with number" + " patterns.", + getCCUtil()->params()), + BOOL_INIT_MEMBER(load_bigram_dawg, true, + "Load dawg with special word " + "bigrams.", + getCCUtil()->params()), double_MEMBER(xheight_penalty_subscripts, 0.125, "Score penalty (0.1 = 10%) added if there are subscripts " "or superscripts in a word, but it is otherwise OK.", getCCUtil()->params()), double_MEMBER(xheight_penalty_inconsistent, 0.25, "Score penalty (0.1 = 10%) added if an xheight is " - "inconsistent.", getCCUtil()->params()), + "inconsistent.", + getCCUtil()->params()), double_MEMBER(segment_penalty_dict_frequent_word, 1.0, "Score multiplier for word matches which have good case and" "are frequent in the given language (lower is better).", getCCUtil()->params()), double_MEMBER(segment_penalty_dict_case_ok, 1.1, "Score multiplier for word matches that have good case " - "(lower is better).", getCCUtil()->params()), + "(lower is better).", + getCCUtil()->params()), double_MEMBER(segment_penalty_dict_case_bad, 1.3125, "Default score multiplier for word matches, which may have " "case issues (lower is better).", getCCUtil()->params()), double_MEMBER(segment_penalty_ngram_best_choice, 1.24, - "Multipler to for the best choice from the ngram model.", - getCCUtil()->params()), + "Multipler to for the best choice from the ngram model.", + getCCUtil()->params()), double_MEMBER(segment_penalty_dict_nonword, 1.25, "Score multiplier for glyph fragment segmentations which " "do not match a dictionary word (lower is better).", @@ -88,11 +95,13 @@ Dict::Dict(CCUtil* ccutil) double_MEMBER(segment_penalty_garbage, 1.50, "Score multiplier for poorly cased strings that are not in" " the dictionary and generally look like garbage (lower is" - " better).", getCCUtil()->params()), + " better).", + getCCUtil()->params()), STRING_MEMBER(output_ambig_words_file, "", "Output file for ambiguities found in the dictionary", getCCUtil()->params()), - INT_MEMBER(dawg_debug_level, 0, "Set to 1 for general debug info" + INT_MEMBER(dawg_debug_level, 0, + "Set to 1 for general debug info" ", to 2 for more details, to 3 to see all the debug messages", getCCUtil()->params()), INT_MEMBER(hyphen_debug_level, 0, "Debug level for hyphenated words.", @@ -109,12 +118,12 @@ Dict::Dict(CCUtil* ccutil) "Certainty threshold for non-dict words", getCCUtil()->params()), double_MEMBER(stopper_phase2_certainty_rejection_offset, 1.0, - "Reject certainty offset", - getCCUtil()->params()), + "Reject certainty offset", getCCUtil()->params()), INT_MEMBER(stopper_smallword_size, 2, "Size of dict word to be treated as non-dict word", getCCUtil()->params()), - double_MEMBER(stopper_certainty_per_char, -0.50, "Certainty to add" + double_MEMBER(stopper_certainty_per_char, -0.50, + "Certainty to add" " for each dict char above small word size.", getCCUtil()->params()), double_MEMBER(stopper_allowable_character_badness, 3.0, @@ -130,9 +139,9 @@ Dict::Dict(CCUtil* ccutil) "Deprecated- backward compatibility only", getCCUtil()->params()), INT_MEMBER(tessedit_truncate_wordchoice_log, 10, - "Max words to keep in list", - getCCUtil()->params()), - STRING_MEMBER(word_to_debug, "", "Word for which stopper debug" + "Max words to keep in list", getCCUtil()->params()), + STRING_MEMBER(word_to_debug, "", + "Word for which stopper debug" " information should be printed to stdout", getCCUtil()->params()), STRING_MEMBER(word_to_debug_lengths, "", @@ -141,10 +150,10 @@ Dict::Dict(CCUtil* ccutil) INT_MEMBER(fragments_debug, 0, "Debug character fragments", getCCUtil()->params()), BOOL_MEMBER(segment_nonalphabetic_script, false, - "Don't use any alphabetic-specific tricks." - "Set to true in the traineddata config file for" - " scripts that are cursive or inherently fixed-pitch", - getCCUtil()->params()), + "Don't use any alphabetic-specific tricks." + "Set to true in the traineddata config file for" + " scripts that are cursive or inherently fixed-pitch", + getCCUtil()->params()), BOOL_MEMBER(save_doc_words, 0, "Save Document Words", getCCUtil()->params()), double_MEMBER(doc_dict_pending_threshold, 0.0, @@ -152,8 +161,11 @@ Dict::Dict(CCUtil* ccutil) getCCUtil()->params()), double_MEMBER(doc_dict_certainty_threshold, -2.25, "Worst certainty for words that can be inserted into the" - "document dictionary", getCCUtil()->params()), - INT_MEMBER(max_permuter_attempts, 10000, "Maximum number of different" + "document dictionary", + getCCUtil()->params()), + INT_MEMBER(max_permuter_attempts, 10000, + "Maximum number of different" + " character choices to consider during permutation." " This limit is especially useful when user patterns" " are specified, since overly generic patterns can result in" diff --git a/dict/stopper.cpp b/dict/stopper.cpp index 660b4c8cf3..a0e3c02b2e 100644 --- a/dict/stopper.cpp +++ b/dict/stopper.cpp @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: stopper.c - ** Purpose: Stopping criteria for word classifier. - ** Author: Dan Johnson - ** History: Mon Apr 29 14:56:49 1991, DSJ, Created. + ** Filename: stopper.c + ** Purpose: Stopping criteria for word classifier. + ** Author: Dan Johnson + ** History: Mon Apr 29 14:56:49 1991, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at @@ -41,7 +41,6 @@ #pragma warning(disable:4800) // int/bool warnings #endif -using tesseract::ScriptPos; /*---------------------------------------------------------------------------- Private Code ----------------------------------------------------------------------------*/ diff --git a/dict/stopper.h b/dict/stopper.h index b028b0ee29..58d23734dd 100644 --- a/dict/stopper.h +++ b/dict/stopper.h @@ -1,10 +1,10 @@ /****************************************************************************** - ** Filename: stopper.h - ** Purpose: Stopping criteria for word classifier. - ** Author: Dan Johnson - ** History: Wed May 1 09:42:57 1991, DSJ, Created. + ** Filename: stopper.h + ** Purpose: Stopping criteria for word classifier. + ** Author: Dan Johnson + ** History: Wed May 1 09:42:57 1991, DSJ, Created. ** - ** (c) Copyright Hewlett-Packard Company, 1988. + ** (c) Copyright Hewlett-Packard Company, 1988. ** Licensed under the Apache License, Version 2.0 (the "License"); ** you may not use this file except in compliance with the License. ** You may obtain a copy of the License at diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index 077b7b215f..2a0420fb7c 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -8,9 +8,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #ifdef _WIN32 -#include #include - #else #include #include @@ -25,23 +23,24 @@ #include "thresholder.h" #if ON_APPLE -#include #include +#include #endif /* Convenience macro to test the version of Leptonica. */ #if defined(LIBLEPT_MAJOR_VERSION) && defined(LIBLEPT_MINOR_VERSION) -# define TESSERACT_LIBLEPT_PREREQ(maj, min) \ - ((LIBLEPT_MAJOR_VERSION) > (maj) || ((LIBLEPT_MAJOR_VERSION) == (maj) && (LIBLEPT_MINOR_VERSION) >= (min))) +#define TESSERACT_LIBLEPT_PREREQ(maj, min) \ + ((LIBLEPT_MAJOR_VERSION) > (maj) || \ + ((LIBLEPT_MAJOR_VERSION) == (maj) && (LIBLEPT_MINOR_VERSION) >= (min))) #else -# define TESSERACT_LIBLEPT_PREREQ(maj, min) 0 +#define TESSERACT_LIBLEPT_PREREQ(maj, min) 0 #endif -#if TESSERACT_LIBLEPT_PREREQ(1,73) -# define CALLOC LEPT_CALLOC -# define FREE LEPT_FREE +#if TESSERACT_LIBLEPT_PREREQ(1, 73) +#define CALLOC LEPT_CALLOC +#define FREE LEPT_FREE #endif #ifdef USE_OPENCL @@ -49,36 +48,28 @@ #include "opencl_device_selection.h" GPUEnv OpenclDevice::gpuEnv; - bool OpenclDevice::deviceIsSelected = false; ds_device OpenclDevice::selectedDevice; - int OpenclDevice::isInited = 0; static l_int32 MORPH_BC = ASYMMETRIC_MORPH_BC; static const l_uint32 lmask32[] = { - 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, - 0xf8000000, 0xfc000000, 0xfe000000, 0xff000000, - 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000, - 0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000, - 0xffff8000, 0xffffc000, 0xffffe000, 0xfffff000, - 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00, - 0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0, - 0xfffffff8, 0xfffffffc, 0xfffffffe, 0xffffffff -}; + 0x80000000, 0xc0000000, 0xe0000000, 0xf0000000, 0xf8000000, 0xfc000000, + 0xfe000000, 0xff000000, 0xff800000, 0xffc00000, 0xffe00000, 0xfff00000, + 0xfff80000, 0xfffc0000, 0xfffe0000, 0xffff0000, 0xffff8000, 0xffffc000, + 0xffffe000, 0xfffff000, 0xfffff800, 0xfffffc00, 0xfffffe00, 0xffffff00, + 0xffffff80, 0xffffffc0, 0xffffffe0, 0xfffffff0, 0xfffffff8, 0xfffffffc, + 0xfffffffe, 0xffffffff}; static const l_uint32 rmask32[] = { - 0x00000001, 0x00000003, 0x00000007, 0x0000000f, - 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, - 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, - 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, - 0x0001ffff, 0x0003ffff, 0x0007ffff, 0x000fffff, - 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, - 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, - 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff -}; + 0x00000001, 0x00000003, 0x00000007, 0x0000000f, 0x0000001f, 0x0000003f, + 0x0000007f, 0x000000ff, 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, + 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, 0x0001ffff, 0x0003ffff, + 0x0007ffff, 0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff, 0x00ffffff, + 0x01ffffff, 0x03ffffff, 0x07ffffff, 0x0fffffff, 0x1fffffff, 0x3fffffff, + 0x7fffffff, 0xffffffff}; struct tiff_transform { int vflip; /* if non-zero, image needs a vertical fip */ @@ -99,7 +90,7 @@ static struct tiff_transform tiff_orientation_transforms[] = { {0, 0, -1} }; -static const l_int32 MAX_PAGES_IN_TIFF_FILE = 3000; +static const l_int32 MAX_PAGES_IN_TIFF_FILE = 3000; cl_mem pixsCLBuffer, pixdCLBuffer, pixdCLIntermediate; //Morph operations buffers cl_mem pixThBuffer; //output from thresholdtopix calculation @@ -109,7 +100,8 @@ KernelEnv rEnv; // substitute invalid characters in device name with _ void legalizeFileName( char *fileName) { //printf("fileName: %s\n", fileName); - const char* invalidChars = "/\?:*\"><| "; // space is valid but can cause headaches + const char *invalidChars = + "/\?:*\"><| "; // space is valid but can cause headaches // for each invalid char for (int i = 0; i < strlen(invalidChars); i++) { char invalidStr[4]; @@ -152,7 +144,6 @@ void populateGPUEnvFromDevice( GPUEnv *gpuInfo, cl_device_id device ) { cl_command_queue_properties queueProperties = 0; gpuInfo->mpCmdQueue = clCreateCommandQueue( gpuInfo->mpContext, gpuInfo->mpDevID, queueProperties, &clStatus ); CHECK_OPENCL( clStatus, "populateGPUEnv::createCommandQueue"); - } int OpenclDevice::LoadOpencl() @@ -168,7 +159,6 @@ int OpenclDevice::LoadOpencl() fprintf(stderr, "[OD] Load opencl.dll failed!\n"); FreeLibrary( static_cast( OpenclDll ) ); return 0; - } fprintf(stderr, "[OD] Load opencl.dll successful!\n"); #endif @@ -191,42 +181,37 @@ cl_mem allocateZeroCopyBuffer(KernelEnv rEnv, l_uint32 *hostbuffer, size_t nElem return membuffer; } -PIX* mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX* pixd, PIX* pixs, int elements, cl_mem_flags flags, bool memcopy = false, bool sync = true) -{ - PROCNAME("mapOutputCLBuffer"); - if (!pixd) - { - if (memcopy) - { - if ((pixd = pixCreateTemplate(pixs)) == NULL) - (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - else - { - if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs), pixGetDepth(pixs))) == NULL) - (PIX *)ERROR_PTR("pixd not made", procName, NULL); - } - } - l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer(rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0, - elements * sizeof(l_uint32), 0, NULL, NULL, NULL ); - - if (memcopy) - { - memcpy(pixGetData(pixd), pValues, elements * sizeof(l_uint32)); - } - else - { - pixSetData(pixd, pValues); +PIX *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX *pixd, PIX *pixs, + int elements, cl_mem_flags flags, bool memcopy = false, + bool sync = true) { + PROCNAME("mapOutputCLBuffer"); + if (!pixd) { + if (memcopy) { + if ((pixd = pixCreateTemplate(pixs)) == NULL) + (PIX *)ERROR_PTR("pixd not made", procName, NULL); + } else { + if ((pixd = pixCreateHeader(pixGetWidth(pixs), pixGetHeight(pixs), + pixGetDepth(pixs))) == NULL) + (PIX *)ERROR_PTR("pixd not made", procName, NULL); } + } + l_uint32 *pValues = (l_uint32 *)clEnqueueMapBuffer( + rEnv.mpkCmdQueue, clbuffer, CL_TRUE, flags, 0, + elements * sizeof(l_uint32), 0, NULL, NULL, NULL); + + if (memcopy) { + memcpy(pixGetData(pixd), pValues, elements * sizeof(l_uint32)); + } else { + pixSetData(pixd, pValues); + } - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,clbuffer,pValues,0,NULL,NULL); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, clbuffer, pValues, 0, NULL, NULL); - if (sync) - { - clFinish( rEnv.mpkCmdQueue ); - } + if (sync) { + clFinish(rEnv.mpkCmdQueue); + } - return pixd; + return pixd; } cl_mem allocateIntBuffer( KernelEnv rEnv, const l_uint32 *_pValues, size_t nElements, cl_int *pStatus , bool sync = false) @@ -261,7 +246,7 @@ void OpenclDevice::releaseMorphCLBuffers() clReleaseMemObject(pixdCLBuffer); if (pixThBuffer != NULL) clReleaseMemObject(pixThBuffer); - pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = NULL; + pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = NULL; } int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs) @@ -305,7 +290,6 @@ PERF_COUNT_SUB("LoadOpencl") #endif // sets up environment, compiles programs - InitOpenclRunEnv_DeviceSelection( 0 ); //PERF_COUNT_SUB("called InitOpenclRunEnv_DS") //PERF_COUNT_END @@ -418,17 +402,18 @@ int OpenclDevice::BinaryGenerated( const char * clFileName, FILE ** fhandle ) int status = 0; char *str = NULL; FILE *fd = NULL; - char fileName[256] = { 0 }, cl_name[128] = { 0 }; + char fileName[256] = {0}, cl_name[128] = {0}; char deviceName[1024]; - clStatus = clGetDeviceInfo( gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL ); - CHECK_OPENCL( clStatus, "clGetDeviceInfo" ); - str = (char*) strstr( clFileName, (char*) ".cl" ); - memcpy( cl_name, clFileName, str - clFileName ); + clStatus = clGetDeviceInfo(gpuEnv.mpArryDevsID[i], CL_DEVICE_NAME, + sizeof(deviceName), deviceName, NULL); + CHECK_OPENCL(clStatus, "clGetDeviceInfo"); + str = (char *)strstr(clFileName, (char *)".cl"); + memcpy(cl_name, clFileName, str - clFileName); cl_name[str - clFileName] = '\0'; - sprintf( fileName, "%s-%s.bin", cl_name, deviceName ); + sprintf(fileName, "%s-%s.bin", cl_name, deviceName); legalizeFileName(fileName); - fd = fopen( fileName, "rb" ); - status = ( fd != NULL ) ? 1 : 0; + fd = fopen(fileName, "rb"); + status = (fd != NULL) ? 1 : 0; if ( fd != NULL ) { *fhandle = fd; @@ -848,7 +833,6 @@ PIX *pix; fclose(fp); PERF_COUNT_END return pix; - } TIFF * OpenclDevice::fopenTiffCl(FILE *fp, @@ -1049,8 +1033,8 @@ tiffCloseCallback(thandle_t handle) *mstream->poutdata = mstream->buffer; *mstream->poutsize = mstream->hw; } - FREE(mstream); /* never free the buffer! */ - return 0; + FREE(mstream); /* never free the buffer! */ + return 0; } @@ -1144,38 +1128,38 @@ OpenclDevice::pixReadMemTiffCl(const l_uint8 *data,size_t size,l_int32 n) l_int32 i, pagefound; PIX *pix; TIFF *tif; - //L_MEMSTREAM *memStream; - PROCNAME("pixReadMemTiffCl"); - - if (!data) - return (PIX *)ERROR_PTR("data pointer is NULL", procName, NULL); - - if ((tif = fopenTiffMemstream("", "r", (l_uint8 **)&data, &size)) == NULL) - return (PIX *)ERROR_PTR("tif not opened", procName, NULL); - - pagefound = FALSE; - pix = NULL; - for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) { - if (i == n) { - pagefound = TRUE; - if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) { - TIFFCleanup(tif); - return (PIX *)ERROR_PTR("pix not read", procName, NULL); - } - break; - } - if (TIFFReadDirectory(tif) == 0) - break; - } + // L_MEMSTREAM *memStream; + PROCNAME("pixReadMemTiffCl"); - if (pagefound == FALSE) { - L_WARNING("tiff page %d not found", procName, i); - TIFFCleanup(tif); - return NULL; - } + if (!data) + return (PIX *)ERROR_PTR("data pointer is NULL", procName, NULL); + + if ((tif = fopenTiffMemstream("", "r", (l_uint8 **)&data, &size)) == + NULL) + return (PIX *)ERROR_PTR("tif not opened", procName, NULL); + + pagefound = FALSE; + pix = NULL; + for (i = 0; i < MAX_PAGES_IN_TIFF_FILE; i++) { + if (i == n) { + pagefound = TRUE; + if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) { + TIFFCleanup(tif); + return (PIX *)ERROR_PTR("pix not read", procName, NULL); + } + break; + } + if (TIFFReadDirectory(tif) == 0) break; + } - TIFFCleanup(tif); - return pix; + if (pagefound == FALSE) { + L_WARNING("tiff page %d not found", procName, i); + TIFFCleanup(tif); + return NULL; + } + + TIFFCleanup(tif); + return pix; } PIX * @@ -1262,7 +1246,6 @@ void compare(l_uint32 *cpu, l_uint32 *gpu,int size) } } printf("\nit matches\n"); - } //OpenCL implementation of pixReadFromTiffStream. @@ -1285,7 +1268,6 @@ PIXCMAP *cmap; if (!tif) return (PIX *)ERROR_PTR("tif not defined", procName, NULL); - TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps); TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp); bpp = bps * spp; @@ -1308,16 +1290,15 @@ PIXCMAP *cmap; wpl = pixGetWpl(pix); bpl = 4 * wpl; - if (spp == 1) { if ((linebuf = (l_uint8 *)CALLOC(tiffbpl + 1, sizeof(l_uint8))) == NULL) return (PIX *)ERROR_PTR("calloc fail for linebuf", procName, NULL); for (i = 0 ; i < h ; i++) { if (TIFFReadScanline(tif, linebuf, i, 0) < 0) { - FREE(linebuf); - pixDestroy(&pix); - return (PIX *)ERROR_PTR("line read fail", procName, NULL); + FREE(linebuf); + pixDestroy(&pix); + return (PIX *)ERROR_PTR("line read fail", procName, NULL); } memcpy((char *)data, (char *)linebuf, tiffbpl); data += bpl; @@ -1325,30 +1306,29 @@ PIXCMAP *cmap; if (bps <= 8) pixEndianByteSwap(pix); else - pixEndianTwoByteSwap(pix); + pixEndianTwoByteSwap(pix); FREE(linebuf); - } - else { - if ((tiffdata = (l_uint32 *)CALLOC(w * h, sizeof(l_uint32))) == NULL) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("calloc fail for tiffdata", procName, NULL); - } - if (!TIFFReadRGBAImageOriented(tif, w, h, (uint32 *)tiffdata, - ORIENTATION_TOPLEFT, 0)) { - FREE(tiffdata); - pixDestroy(&pix); - return (PIX *)ERROR_PTR("failed to read tiffdata", procName, NULL); - } - line = pixGetData(pix); + } else { + if ((tiffdata = (l_uint32 *)CALLOC(w * h, sizeof(l_uint32))) == NULL) { + pixDestroy(&pix); + return (PIX *)ERROR_PTR("calloc fail for tiffdata", procName, NULL); + } + if (!TIFFReadRGBAImageOriented(tif, w, h, (uint32 *)tiffdata, + ORIENTATION_TOPLEFT, 0)) { + FREE(tiffdata); + pixDestroy(&pix); + return (PIX *)ERROR_PTR("failed to read tiffdata", procName, NULL); + } + line = pixGetData(pix); - //Invoke the OpenCL kernel for pixReadFromTiff - l_uint32* output_gpu=pixReadFromTiffKernel(tiffdata,w,h,wpl,line); + // Invoke the OpenCL kernel for pixReadFromTiff + l_uint32 *output_gpu = pixReadFromTiffKernel(tiffdata, w, h, wpl, line); - pixSetData(pix, output_gpu); - // pix already has data allocated, it now points to output_gpu? - FREE(tiffdata); - FREE(line); - //FREE(output_gpu); + pixSetData(pix, output_gpu); + // pix already has data allocated, it now points to output_gpu? + FREE(tiffdata); + FREE(line); + // FREE(output_gpu); } if (getTiffStreamResolutionCl(tif, &xres, &yres) == 0) { @@ -1362,7 +1342,6 @@ PIXCMAP *cmap; pixSetInputFormat(pix, comptype); if (TIFFGetField(tif, TIFFTAG_COLORMAP, &redmap, &greenmap, &bluemap)) { - if ((cmap = pixcmapCreate(bps)) == NULL) { pixDestroy(&pix); return (PIX *)ERROR_PTR("cmap not made", procName, NULL); @@ -1372,22 +1351,19 @@ PIXCMAP *cmap; pixcmapAddColor(cmap, redmap[i] >> 8, greenmap[i] >> 8, bluemap[i] >> 8); pixSetColormap(pix, cmap); - } - else { - if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) { - - if (tiffcomp == COMPRESSION_CCITTFAX3 || - tiffcomp == COMPRESSION_CCITTFAX4 || - tiffcomp == COMPRESSION_CCITTRLE || - tiffcomp == COMPRESSION_CCITTRLEW) { - photometry = PHOTOMETRIC_MINISWHITE; - } - else - photometry = PHOTOMETRIC_MINISBLACK; - } - if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) || - (d == 8 && photometry == PHOTOMETRIC_MINISWHITE)) - pixInvert(pix, pix); + } else { + if (!TIFFGetField(tif, TIFFTAG_PHOTOMETRIC, &photometry)) { + if (tiffcomp == COMPRESSION_CCITTFAX3 || + tiffcomp == COMPRESSION_CCITTFAX4 || + tiffcomp == COMPRESSION_CCITTRLE || + tiffcomp == COMPRESSION_CCITTRLEW) { + photometry = PHOTOMETRIC_MINISWHITE; + } else + photometry = PHOTOMETRIC_MINISBLACK; + } + if ((d == 1 && photometry == PHOTOMETRIC_MINISBLACK) || + (d == 8 && photometry == PHOTOMETRIC_MINISWHITE)) + pixInvert(pix, pix); } if (TIFFGetField(tif, TIFFTAG_ORIENTATION, &orientation)) { @@ -1635,95 +1611,50 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) if (xp > 31 || xn > 31) { - //Generic case. - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateHor", &status ); - - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(xp), - (const void *)&xp); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(xn), - (const void *)&xn); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(wpl), - (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(h), - (const void *)&h); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); - - if (yp > 0 || yn > 0) - { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; + // Generic case. + rEnv.mpkKernel = + clCreateKernel(rEnv.mpkProgram, "morphoDilateHor", &status); + + status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), (const void *)&xn); + status = + clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), (const void *)&wpl); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), (const void *)&h); + status = + clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); + + if (yp > 0 || yn > 0) { + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; } } else if (xp > 0 || xn > 0 ) { - //Specific Horizontal pass kernel for half width < 32 - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateHor_32word", &status ); - isEven = (xp != xn); - - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(xp), - (const void *)&xp); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(wpl), - (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(h), - (const void *)&h); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(isEven), - (const void *)&isEven); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); - - if (yp > 0 || yn > 0) - { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; + // Specific Horizontal pass kernel for half width < 32 + rEnv.mpkKernel = + clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_32word", &status); + isEven = (xp != xn); + + status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); + status = + clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), (const void *)&wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), (const void *)&h); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isEven), + (const void *)&isEven); + status = + clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); + + if (yp > 0 || yn > 0) { + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; } } @@ -1766,253 +1697,153 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) NULL); } - return status; } //Morphology Erode operation. Invokes the relevant OpenCL kernels -cl_int -pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) -{ - - l_int32 xp, yp, xn, yn; - SEL* sel; - size_t globalThreads[2]; - size_t localThreads[2]; - cl_mem pixtemp; - cl_int status; - int gsize; - char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC); - l_uint32 rwmask, lwmask; - char isEven; - - sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); - - selFindMaxTranslations(sel, &xp, &yp, &xn, &yn); - selDestroy(&sel); - OpenclDevice::SetKernelEnv( &rEnv ); - - if (hsize == 5 && vsize == 5 && isAsymmetric) - { - //Specific kernel for 5x5 - status = pixErodeCL_55(wpl, h); - return status; - } - - lwmask = lmask32[31 - (xn & 31)]; - rwmask = rmask32[31 - (xp & 31)]; - - //global and local work dimensions for Horizontal pass - gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; - localThreads[0] = GROUPSIZE_X; - localThreads[1] = GROUPSIZE_Y; - - //Horizontal Pass - if (xp > 31 || xn > 31 ) - { - //Generic case. - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeHor", &status ); - - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(xp), - (const void *)&xp); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(xn), - (const void *)&xn); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(wpl), - (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(h), - (const void *)&h); - status = clSetKernelArg(rEnv.mpkKernel, - 6, - sizeof(isAsymmetric), - (const void *)&isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, - 7, - sizeof(rwmask), - (const void *)&rwmask); - status = clSetKernelArg(rEnv.mpkKernel, - 8, - sizeof(lwmask), - (const void *)&lwmask); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); - - if (yp > 0 || yn > 0) - { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } - } - else if (xp > 0 || xn > 0) - { - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeHor_32word", &status ); - isEven = (xp != xn); - - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(xp), - (const void *)&xp); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(wpl), - (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(h), - (const void *)&h); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(isAsymmetric), - (const void *)&isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, - 6, - sizeof(rwmask), - (const void *)&rwmask); - status = clSetKernelArg(rEnv.mpkKernel, - 7, - sizeof(lwmask), - (const void *)&lwmask); - status = clSetKernelArg(rEnv.mpkKernel, - 8, - sizeof(isEven), - (const void *)&isEven); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); +cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) { + l_int32 xp, yp, xn, yn; + SEL *sel; + size_t globalThreads[2]; + size_t localThreads[2]; + cl_mem pixtemp; + cl_int status; + int gsize; + char isAsymmetric = (MORPH_BC == ASYMMETRIC_MORPH_BC); + l_uint32 rwmask, lwmask; + char isEven; + + sel = selCreateBrick(vsize, hsize, vsize / 2, hsize / 2, SEL_HIT); + + selFindMaxTranslations(sel, &xp, &yp, &xn, &yn); + selDestroy(&sel); + OpenclDevice::SetKernelEnv(&rEnv); + + if (hsize == 5 && vsize == 5 && isAsymmetric) { + // Specific kernel for 5x5 + status = pixErodeCL_55(wpl, h); + return status; + } - if (yp > 0 || yn > 0) - { - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - } + lwmask = lmask32[31 - (xn & 31)]; + rwmask = rmask32[31 - (xp & 31)]; + + // global and local work dimensions for Horizontal pass + gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; + globalThreads[0] = gsize; + gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; + globalThreads[1] = gsize; + localThreads[0] = GROUPSIZE_X; + localThreads[1] = GROUPSIZE_Y; + + // Horizontal Pass + if (xp > 31 || xn > 31) { + // Generic case. + rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeHor", &status); + + status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), (const void *)&xn); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), (const void *)&wpl); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), (const void *)&h); + status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(isAsymmetric), + (const void *)&isAsymmetric); + status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(rwmask), + (const void *)&rwmask); + status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(lwmask), + (const void *)&lwmask); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); + + if (yp > 0 || yn > 0) { + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; + } + } else if (xp > 0 || xn > 0) { + rEnv.mpkKernel = + clCreateKernel(rEnv.mpkProgram, "morphoErodeHor_32word", &status); + isEven = (xp != xn); + + status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), (const void *)&wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), (const void *)&h); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), + (const void *)&isAsymmetric); + status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(rwmask), + (const void *)&rwmask); + status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(lwmask), + (const void *)&lwmask); + status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(isEven), + (const void *)&isEven); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); + + if (yp > 0 || yn > 0) { + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; } + } - //Vertical Pass - if (yp > 0 || yn > 0) - { - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeVer", &status ); - - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &pixsCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(yp), - (const void *)&yp); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(wpl), - (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(h), - (const void *)&h); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(isAsymmetric), - (const void *)&isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, - 6, - sizeof(yn), - (const void *)&yn); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); - } + // Vertical Pass + if (yp > 0 || yn > 0) { + rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeVer", &status); + + status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), (const void *)&yp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), (const void *)&wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), (const void *)&h); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), + (const void *)&isAsymmetric); + status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(yn), (const void *)&yn); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); + } - return status; + return status; } // OpenCL implementation of Morphology Dilate //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy = false) -{ - l_uint32 wpl, h; +PIX *OpenclDevice::pixDilateBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, + l_int32 vsize, bool reqDataCopy = false) { + l_uint32 wpl, h; - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); - clStatus = pixDilateCL(hsize, vsize, wpl, h); + clStatus = pixDilateCL(hsize, vsize, wpl, h); - if (reqDataCopy) - { - pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ, false); - } + if (reqDataCopy) { + pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, + CL_MAP_READ, false); + } - return pixd; + return pixd; } // OpenCL implementation of Morphology Erode //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize, bool reqDataCopy = false) -{ - l_uint32 wpl, h; +PIX *OpenclDevice::pixErodeBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, + l_int32 vsize, bool reqDataCopy = false) { + l_uint32 wpl, h; - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); - clStatus = pixErodeCL(hsize, vsize, wpl, h); + clStatus = pixErodeCL(hsize, vsize, wpl, h); - if (reqDataCopy) - { - pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ); - } + if (reqDataCopy) { + pixd = + mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ); + } - return pixd; + return pixd; } //Morphology Open operation. Invokes the relevant OpenCL kernels @@ -2055,50 +1886,40 @@ pixCloseCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) // OpenCL implementation of Morphology Close //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixCloseBrickCL(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize, - bool reqDataCopy = false) -{ - l_uint32 wpl, h; +PIX *OpenclDevice::pixCloseBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, + l_int32 vsize, bool reqDataCopy = false) { + l_uint32 wpl, h; - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); - clStatus = pixCloseCL(hsize, vsize, wpl, h); + clStatus = pixCloseCL(hsize, vsize, wpl, h); - if (reqDataCopy) - { - pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ); - } + if (reqDataCopy) { + pixd = + mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ); + } - return pixd; + return pixd; } // OpenCL implementation of Morphology Open //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixOpenBrickCL(PIX *pixd, - PIX *pixs, - l_int32 hsize, - l_int32 vsize, - bool reqDataCopy = false) -{ - l_uint32 wpl, h; +PIX *OpenclDevice::pixOpenBrickCL(PIX *pixd, PIX *pixs, l_int32 hsize, + l_int32 vsize, bool reqDataCopy = false) { + l_uint32 wpl, h; - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); - clStatus = pixOpenCL(hsize, vsize, wpl, h); + clStatus = pixOpenCL(hsize, vsize, wpl, h); - if (reqDataCopy) - { - pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ); - } + if (reqDataCopy) { + pixd = + mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ); + } - return pixd; + return pixd; } //pix OR operation: outbuffer = buffer1 | buffer2 @@ -2262,19 +2083,16 @@ pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_ // OpenCL implementation of Subtract pix //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2, bool reqDataCopy = false) -{ - l_uint32 wpl, h; +PIX *OpenclDevice::pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2, + bool reqDataCopy = false) { + l_uint32 wpl, h; - PROCNAME("pixSubtractCL"); + PROCNAME("pixSubtractCL"); - if (!pixs1) - return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); - if (!pixs2) - return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); - if (pixGetDepth(pixs1) != pixGetDepth(pixs2)) - return (PIX *)ERROR_PTR("depths of pixs* unequal", procName, pixd); + if (!pixs1) return (PIX *)ERROR_PTR("pixs1 not defined", procName, pixd); + if (!pixs2) return (PIX *)ERROR_PTR("pixs2 not defined", procName, pixd); + if (pixGetDepth(pixs1) != pixGetDepth(pixs2)) + return (PIX *)ERROR_PTR("depths of pixs* unequal", procName, pixd); #if EQUAL_SIZE_WARNING if (!pixSizesEqual(pixs1, pixs2)) @@ -2297,200 +2115,207 @@ OpenclDevice::pixSubtractCL(PIX *pixd, PIX *pixs1, PIX *pixs2, bool reqDataCo // OpenCL implementation of Hollow pix //Note: Assumes the source and dest opencl buffer are initialized. No check done -PIX* -OpenclDevice::pixHollowCL(PIX *pixd, - PIX *pixs, - l_int32 close_hsize, - l_int32 close_vsize, - l_int32 open_hsize, - l_int32 open_vsize, - bool reqDataCopy = false) -{ - l_uint32 wpl, h; - cl_mem pixtemp; - - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); - - //First step : Close Morph operation: Dilate followed by Erode - clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h); - - //Store the output of close operation in an intermediate buffer - //this will be later used for pixsubtract - clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0, sizeof(int) * wpl*h, 0, NULL, NULL); - - //Second step: Open Operation - Erode followed by Dilate - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; - - clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h); - - //Third step: Subtract : (Close - Open) - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixdCLIntermediate; - pixdCLIntermediate = pixtemp; - - clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer); - - if (reqDataCopy) - { - //Read back output data from OCL buffer to cpu - pixd = mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl*h, CL_MAP_READ); - } - return pixd; +PIX *OpenclDevice::pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize, + l_int32 close_vsize, l_int32 open_hsize, + l_int32 open_vsize, bool reqDataCopy = false) { + l_uint32 wpl, h; + cl_mem pixtemp; + + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); + + // First step : Close Morph operation: Dilate followed by Erode + clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h); + + // Store the output of close operation in an intermediate buffer + // this will be later used for pixsubtract + clStatus = + clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, + 0, sizeof(int) * wpl * h, 0, NULL, NULL); + + // Second step: Open Operation - Erode followed by Dilate + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; + + clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h); + + // Third step: Subtract : (Close - Open) + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixdCLIntermediate; + pixdCLIntermediate = pixtemp; + + clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer); + + if (reqDataCopy) { + // Read back output data from OCL buffer to cpu + pixd = + mapOutputCLBuffer(rEnv, pixdCLBuffer, pixd, pixs, wpl * h, CL_MAP_READ); + } + return pixd; } // OpenCL implementation of Get Lines from pix function //Note: Assumes the source and dest opencl buffer are initialized. No check done -void -OpenclDevice::pixGetLinesCL(PIX *pixd, - PIX *pixs, - PIX** pix_vline, - PIX** pix_hline, - PIX** pixClosed, - bool getpixClosed, - l_int32 close_hsize, l_int32 close_vsize, - l_int32 open_hsize, l_int32 open_vsize, - l_int32 line_hsize, l_int32 line_vsize) -{ - l_uint32 wpl, h; - cl_mem pixtemp; - - wpl = pixGetWpl(pixs); - h = pixGetHeight(pixs); - - //First step : Close Morph operation: Dilate followed by Erode - clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h); - - //Copy the Close output to CPU buffer - if (getpixClosed) - { - *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs, wpl*h, CL_MAP_READ, true, false); - } +void OpenclDevice::pixGetLinesCL(PIX *pixd, PIX *pixs, PIX **pix_vline, + PIX **pix_hline, PIX **pixClosed, + bool getpixClosed, l_int32 close_hsize, + l_int32 close_vsize, l_int32 open_hsize, + l_int32 open_vsize, l_int32 line_hsize, + l_int32 line_vsize) { + l_uint32 wpl, h; + cl_mem pixtemp; + + wpl = pixGetWpl(pixs); + h = pixGetHeight(pixs); + + // First step : Close Morph operation: Dilate followed by Erode + clStatus = pixCloseCL(close_hsize, close_vsize, wpl, h); + + // Copy the Close output to CPU buffer + if (getpixClosed) { + *pixClosed = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pixClosed, pixs, + wpl * h, CL_MAP_READ, true, false); + } - //Store the output of close operation in an intermediate buffer - //this will be later used for pixsubtract - clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0, sizeof(int) * wpl*h, 0, NULL, NULL); + // Store the output of close operation in an intermediate buffer + // this will be later used for pixsubtract + clStatus = + clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, + 0, sizeof(int) * wpl * h, 0, NULL, NULL); - //Second step: Open Operation - Erode followed by Dilate - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; + // Second step: Open Operation - Erode followed by Dilate + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; - clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h); + clStatus = pixOpenCL(open_hsize, open_vsize, wpl, h); - //Third step: Subtract : (Close - Open) - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixdCLIntermediate; - pixdCLIntermediate = pixtemp; + // Third step: Subtract : (Close - Open) + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixdCLIntermediate; + pixdCLIntermediate = pixtemp; - clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer); + clStatus = pixSubtractCL_work(wpl, h, pixdCLBuffer, pixsCLBuffer); - //Store the output of Hollow operation in an intermediate buffer - //this will be later used - clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, 0, sizeof(int) * wpl*h, 0, NULL, NULL); + // Store the output of Hollow operation in an intermediate buffer + // this will be later used + clStatus = + clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixdCLBuffer, pixdCLIntermediate, 0, + 0, sizeof(int) * wpl * h, 0, NULL, NULL); - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLBuffer; - pixdCLBuffer = pixtemp; + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLBuffer; + pixdCLBuffer = pixtemp; - //Fourth step: Get vertical line - //pixOpenBrick(NULL, pix_hollow, 1, min_line_length); - clStatus = pixOpenCL(1, line_vsize, wpl, h); + // Fourth step: Get vertical line + // pixOpenBrick(NULL, pix_hollow, 1, min_line_length); + clStatus = pixOpenCL(1, line_vsize, wpl, h); - //Copy the vertical line output to CPU buffer - *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl*h, CL_MAP_READ, true, false); + // Copy the vertical line output to CPU buffer + *pix_vline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_vline, pixs, wpl * h, + CL_MAP_READ, true, false); - pixtemp = pixsCLBuffer; - pixsCLBuffer = pixdCLIntermediate; - pixdCLIntermediate = pixtemp; + pixtemp = pixsCLBuffer; + pixsCLBuffer = pixdCLIntermediate; + pixdCLIntermediate = pixtemp; - //Fifth step: Get horizontal line - //pixOpenBrick(NULL, pix_hollow, min_line_length, 1); - clStatus = pixOpenCL(line_hsize, 1, wpl, h); + // Fifth step: Get horizontal line + // pixOpenBrick(NULL, pix_hollow, min_line_length, 1); + clStatus = pixOpenCL(line_hsize, 1, wpl, h); - //Copy the horizontal line output to CPU buffer - *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl*h, CL_MAP_READ, true, true); + // Copy the horizontal line output to CPU buffer + *pix_hline = mapOutputCLBuffer(rEnv, pixdCLBuffer, *pix_hline, pixs, wpl * h, + CL_MAP_READ, true, true); - return; + return; } - /************************************************************************* * HistogramRect * Otsu Thresholding Operations * histogramAllChannels is laid out as all channel 0, then all channel 1... * only supports 1 or 4 channels (bytes_per_pixel) ************************************************************************/ -int OpenclDevice::HistogramRectOCL( - const unsigned char* imageData, - int bytes_per_pixel, - int bytes_per_line, - int left, // always 0 - int top, // always 0 - int width, - int height, - int kHistogramSize, - int* histogramAllChannels) -{ -PERF_COUNT_START("HistogramRectOCL") - cl_int clStatus; - int retVal= 0; - KernelEnv histKern; - SetKernelEnv( &histKern ); - KernelEnv histRedKern; - SetKernelEnv( &histRedKern ); - /* map imagedata to device as read only */ - // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be coherent which we don't need. - // faster option would be to allocate initial image buffer - // using a garlic bus memory type - cl_mem imageBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, width*height*bytes_per_pixel*sizeof(char), (void *)imageData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer"); - - /* setup work group size parameters */ - int block_size = 256; - cl_uint numCUs; - clStatus = clGetDeviceInfo( gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numCUs), &numCUs, NULL); - CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer"); - - int requestedOccupancy = 10; - int numWorkGroups = numCUs * requestedOccupancy; - int numThreads = block_size*numWorkGroups; - size_t local_work_size[] = {static_cast(block_size)}; - size_t global_work_size[] = {static_cast(numThreads)}; - size_t red_global_work_size[] = {static_cast(block_size*kHistogramSize*bytes_per_pixel)}; - - /* map histogramAllChannels as write only */ - int numBins = kHistogramSize*bytes_per_pixel*numWorkGroups; - - cl_mem histogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, kHistogramSize*bytes_per_pixel*sizeof(int), (void *)histogramAllChannels, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer histogramBuffer"); - - /* intermediate histogram buffer */ - int histRed = 256; - int tmpHistogramBins = kHistogramSize*bytes_per_pixel*histRed; - - cl_mem tmpHistogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE, tmpHistogramBins*sizeof(cl_uint), NULL, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer tmpHistogramBuffer"); - - /* atomic sync buffer */ - int *zeroBuffer = new int[1]; - zeroBuffer[0] = 0; - cl_mem atomicSyncBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(cl_int), (void *)zeroBuffer, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer atomicSyncBuffer"); - delete[] zeroBuffer; - //Create kernel objects based on bytes_per_pixel - if (bytes_per_pixel == 1) - { - histKern.mpkKernel = clCreateKernel( histKern.mpkProgram, "kernel_HistogramRectOneChannel", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectOneChannel"); - - histRedKern.mpkKernel = clCreateKernel( histRedKern.mpkProgram, "kernel_HistogramRectOneChannelReduction", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectOneChannelReduction"); - } else { +int OpenclDevice::HistogramRectOCL(const unsigned char *imageData, + int bytes_per_pixel, int bytes_per_line, + int left, // always 0 + int top, // always 0 + int width, int height, int kHistogramSize, + int *histogramAllChannels) { + PERF_COUNT_START("HistogramRectOCL") + cl_int clStatus; + int retVal = 0; + KernelEnv histKern; + SetKernelEnv(&histKern); + KernelEnv histRedKern; + SetKernelEnv(&histRedKern); + /* map imagedata to device as read only */ + // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be + // coherent which we don't need. + // faster option would be to allocate initial image buffer + // using a garlic bus memory type + cl_mem imageBuffer = clCreateBuffer( + histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + width * height * bytes_per_pixel * sizeof(char), (void *)imageData, + &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); + + /* setup work group size parameters */ + int block_size = 256; + cl_uint numCUs; + clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(numCUs), &numCUs, NULL); + CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); + + int requestedOccupancy = 10; + int numWorkGroups = numCUs * requestedOccupancy; + int numThreads = block_size * numWorkGroups; + size_t local_work_size[] = {static_cast(block_size)}; + size_t global_work_size[] = {static_cast(numThreads)}; + size_t red_global_work_size[] = { + static_cast(block_size * kHistogramSize * bytes_per_pixel)}; + + /* map histogramAllChannels as write only */ + int numBins = kHistogramSize * bytes_per_pixel * numWorkGroups; + + cl_mem histogramBuffer = clCreateBuffer( + histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, + kHistogramSize * bytes_per_pixel * sizeof(int), + (void *)histogramAllChannels, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer histogramBuffer"); + + /* intermediate histogram buffer */ + int histRed = 256; + int tmpHistogramBins = kHistogramSize * bytes_per_pixel * histRed; + + cl_mem tmpHistogramBuffer = + clCreateBuffer(histKern.mpkContext, CL_MEM_READ_WRITE, + tmpHistogramBins * sizeof(cl_uint), NULL, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer tmpHistogramBuffer"); + + /* atomic sync buffer */ + int *zeroBuffer = new int[1]; + zeroBuffer[0] = 0; + cl_mem atomicSyncBuffer = clCreateBuffer( + histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + sizeof(cl_int), (void *)zeroBuffer, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer atomicSyncBuffer"); + delete[] zeroBuffer; + // Create kernel objects based on bytes_per_pixel + if (bytes_per_pixel == 1) { + histKern.mpkKernel = clCreateKernel( + histKern.mpkProgram, "kernel_HistogramRectOneChannel", &clStatus); + CHECK_OPENCL(clStatus, "clCreateKernel kernel_HistogramRectOneChannel"); + + histRedKern.mpkKernel = + clCreateKernel(histRedKern.mpkProgram, + "kernel_HistogramRectOneChannelReduction", &clStatus); + CHECK_OPENCL(clStatus, + "clCreateKernel kernel_HistogramRectOneChannelReduction"); + } else { histKern.mpkKernel = clCreateKernel( histKern.mpkProgram, "kernel_HistogramRectAllChannels", &clStatus ); CHECK_OPENCL( clStatus, "clCreateKernel kernel_HistogramRectAllChannels"); @@ -2534,10 +2359,9 @@ PERF_COUNT_SUB("before") 0, NULL, NULL ); CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels" ); clFinish( histKern.mpkCmdQueue ); - if(clStatus !=0) - { - retVal = -1; - } + if (clStatus != 0) { + retVal = -1; + } /* launch histogram */ clStatus = clEnqueueNDRangeKernel( histRedKern.mpkCmdQueue, @@ -2546,27 +2370,24 @@ PERF_COUNT_SUB("before") 0, NULL, NULL ); CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction" ); clFinish( histRedKern.mpkCmdQueue ); - if(clStatus !=0) - { - retVal = -1; - } -PERF_COUNT_SUB("redKernel") + if (clStatus != 0) { + retVal = -1; + } + PERF_COUNT_SUB("redKernel") /* map results back from gpu */ ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE, CL_MAP_READ, 0, kHistogramSize*bytes_per_pixel*sizeof(int), 0, NULL, NULL, &clStatus); CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer"); - if(clStatus !=0) - { - retVal = -1; - } + if (clStatus != 0) { + retVal = -1; + } clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0, NULL, NULL); clReleaseMemObject(histogramBuffer); clReleaseMemObject(imageBuffer); PERF_COUNT_SUB("after") PERF_COUNT_END - return retVal; - +return retVal; } /************************************************************************* @@ -2574,111 +2395,122 @@ PERF_COUNT_END * from the class, using thresholds/hi_values to the output IMAGE. * only supports 1 or 4 channels ************************************************************************/ -int OpenclDevice::ThresholdRectToPixOCL( - const unsigned char* imageData, - int bytes_per_pixel, - int bytes_per_line, - const int* thresholds, - const int* hi_values, - Pix** pix, - int height, - int width, - int top, - int left) { -PERF_COUNT_START("ThresholdRectToPixOCL") - int retVal =0; - /* create pix result buffer */ - *pix = pixCreate(width, height, 1); - uinT32* pixData = pixGetData(*pix); - int wpl = pixGetWpl(*pix); - int pixSize = wpl*height*sizeof(uinT32); // number of pixels - - cl_int clStatus; - KernelEnv rEnv; - SetKernelEnv( &rEnv ); - - /* setup work group size parameters */ - int block_size = 256; - cl_uint numCUs = 6; - clStatus = clGetDeviceInfo( gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(numCUs), &numCUs, NULL); - CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer"); - - int requestedOccupancy = 10; - int numWorkGroups = numCUs * requestedOccupancy; - int numThreads = block_size*numWorkGroups; - size_t local_work_size[] = {(size_t) block_size}; - size_t global_work_size[] = {(size_t) numThreads}; - - /* map imagedata to device as read only */ - // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be coherent which we don't need. - // faster option would be to allocate initial image buffer - // using a garlic bus memory type - cl_mem imageBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, width*height*bytes_per_pixel*sizeof(char), (void *)imageData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer imageBuffer"); - - /* map pix as write only */ - pixThBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, pixSize, (void *)pixData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer pix"); - - /* map thresholds and hi_values */ - cl_mem thresholdsBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, bytes_per_pixel*sizeof(int), (void *)thresholds, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer thresholdBuffer"); - cl_mem hiValuesBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, bytes_per_pixel*sizeof(int), (void *)hi_values, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer hiValuesBuffer"); - - /* compile kernel */ - if (bytes_per_pixel == 4) { - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "kernel_ThresholdRectToPix", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_ThresholdRectToPix"); - } else { - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "kernel_ThresholdRectToPix_OneChan", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_ThresholdRectToPix_OneChan"); - } +int OpenclDevice::ThresholdRectToPixOCL(const unsigned char *imageData, + int bytes_per_pixel, int bytes_per_line, + const int *thresholds, + const int *hi_values, Pix **pix, + int height, int width, int top, + int left) { + PERF_COUNT_START("ThresholdRectToPixOCL") + int retVal = 0; + /* create pix result buffer */ + *pix = pixCreate(width, height, 1); + uinT32 *pixData = pixGetData(*pix); + int wpl = pixGetWpl(*pix); + int pixSize = wpl * height * sizeof(uinT32); // number of pixels + + cl_int clStatus; + KernelEnv rEnv; + SetKernelEnv(&rEnv); + + /* setup work group size parameters */ + int block_size = 256; + cl_uint numCUs = 6; + clStatus = clGetDeviceInfo(gpuEnv.mpDevID, CL_DEVICE_MAX_COMPUTE_UNITS, + sizeof(numCUs), &numCUs, NULL); + CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); + + int requestedOccupancy = 10; + int numWorkGroups = numCUs * requestedOccupancy; + int numThreads = block_size * numWorkGroups; + size_t local_work_size[] = {(size_t)block_size}; + size_t global_work_size[] = {(size_t)numThreads}; + + /* map imagedata to device as read only */ + // USE_HOST_PTR uses onion+ bus which is slowest option; also happens to be + // coherent which we don't need. + // faster option would be to allocate initial image buffer + // using a garlic bus memory type + cl_mem imageBuffer = + clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + width * height * bytes_per_pixel * sizeof(char), + (void *)imageData, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); + + /* map pix as write only */ + pixThBuffer = + clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, + pixSize, (void *)pixData, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer pix"); + + /* map thresholds and hi_values */ + cl_mem thresholdsBuffer = clCreateBuffer( + rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + bytes_per_pixel * sizeof(int), (void *)thresholds, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer thresholdBuffer"); + cl_mem hiValuesBuffer = clCreateBuffer( + rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + bytes_per_pixel * sizeof(int), (void *)hi_values, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer hiValuesBuffer"); + + /* compile kernel */ + if (bytes_per_pixel == 4) { + rEnv.mpkKernel = + clCreateKernel(rEnv.mpkProgram, "kernel_ThresholdRectToPix", &clStatus); + CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix"); + } else { + rEnv.mpkKernel = clCreateKernel( + rEnv.mpkProgram, "kernel_ThresholdRectToPix_OneChan", &clStatus); + CHECK_OPENCL(clStatus, "clCreateKernel kernel_ThresholdRectToPix_OneChan"); + } - /* set kernel arguments */ - clStatus = clSetKernelArg( rEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&imageBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg imageBuffer"); - cl_uint numPixels = width*height; - clStatus = clSetKernelArg( rEnv.mpkKernel, 1, sizeof(int), (void *)&height ); - CHECK_OPENCL( clStatus, "clSetKernelArg height" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 2, sizeof(int), (void *)&width ); - CHECK_OPENCL( clStatus, "clSetKernelArg width" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 3, sizeof(int), (void *)&wpl ); - CHECK_OPENCL( clStatus, "clSetKernelArg wpl" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 4, sizeof(cl_mem), (void *)&thresholdsBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg thresholdsBuffer" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 5, sizeof(cl_mem), (void *)&hiValuesBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg hiValuesBuffer" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 6, sizeof(cl_mem), (void *)&pixThBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg pixThBuffer"); - - /* launch kernel & wait */ -PERF_COUNT_SUB("before") - clStatus = clEnqueueNDRangeKernel( - rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 1, NULL, global_work_size, local_work_size, - 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix" ); - clFinish( rEnv.mpkCmdQueue ); -PERF_COUNT_SUB("kernel") - if(clStatus !=0) - { - printf("Setting return value to -1\n"); - retVal = -1; - } - /* map results back from gpu */ - void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0, pixSize, 0, NULL, NULL, &clStatus); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer"); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, NULL, NULL); + /* set kernel arguments */ + clStatus = + clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&imageBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); + cl_uint numPixels = width * height; + clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(int), (void *)&height); + CHECK_OPENCL(clStatus, "clSetKernelArg height"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(int), (void *)&width); + CHECK_OPENCL(clStatus, "clSetKernelArg width"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(int), (void *)&wpl); + CHECK_OPENCL(clStatus, "clSetKernelArg wpl"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), + (void *)&thresholdsBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg thresholdsBuffer"); + clStatus = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(cl_mem), + (void *)&hiValuesBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg hiValuesBuffer"); + clStatus = + clSetKernelArg(rEnv.mpkKernel, 6, sizeof(cl_mem), (void *)&pixThBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg pixThBuffer"); + + /* launch kernel & wait */ + PERF_COUNT_SUB("before") + clStatus = + clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 1, NULL, + global_work_size, local_work_size, 0, NULL, NULL); + CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix"); + clFinish(rEnv.mpkCmdQueue); + PERF_COUNT_SUB("kernel") + if (clStatus != 0) { + printf("Setting return value to -1\n"); + retVal = -1; + } + /* map results back from gpu */ + void *ptr = + clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0, + pixSize, 0, NULL, NULL, &clStatus); + CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer"); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, NULL, NULL); - clReleaseMemObject(imageBuffer); - clReleaseMemObject(thresholdsBuffer); - clReleaseMemObject(hiValuesBuffer); + clReleaseMemObject(imageBuffer); + clReleaseMemObject(thresholdsBuffer); + clReleaseMemObject(hiValuesBuffer); -PERF_COUNT_SUB("after") -PERF_COUNT_END -return retVal; + PERF_COUNT_SUB("after") + PERF_COUNT_END + return retVal; } @@ -2755,7 +2587,6 @@ void populateTessScoreEvaluationInputData( TessScoreEvaluationInputData *input ) float fractionBlack = 0.1; // how much of the image should be blackened int numSpots = (height*width)*fractionBlack/(maxLineWidth*maxLineWidth/2/2); for (int i = 0; i < numSpots; i++) { - int lineWidth = rand()%maxLineWidth; int col = lineWidth + rand()%(width-2*lineWidth); int row = lineWidth + rand()%(height-2*lineWidth); @@ -2786,15 +2617,14 @@ typedef struct _TessDeviceScore { *****************************************************************************/ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) { - double time = 0; #if ON_WINDOWS LARGE_INTEGER freq, time_funct_start, time_funct_end; QueryPerformanceFrequency(&freq); #elif ON_APPLE - mach_timebase_info_data_t info = { 0, 0 }; + mach_timebase_info_data_t info = {0, 0}; mach_timebase_info(&info); - long long start,stop; + long long start, stop; #else timespec time_funct_start, time_funct_end; #endif @@ -2805,8 +2635,8 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif @@ -2817,9 +2647,9 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -2828,8 +2658,8 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu } else { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif @@ -2842,7 +2672,6 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu int idx = 0; for (i = 0; i < input.height ; i++) { for (j = 0; j < input.width; j++) { - l_uint32 tiffword = tiffdata[i * input.width + j]; l_int32 rval = ((tiffword) & 0xff); l_int32 gval = (((tiffword) >> 8) & 0xff); @@ -2855,9 +2684,9 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -2872,15 +2701,14 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu } double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) { - double time; #if ON_WINDOWS LARGE_INTEGER freq, time_funct_start, time_funct_end; QueryPerformanceFrequency(&freq); -#elif ON_APPLE - mach_timebase_info_data_t info = { 0, 0 }; +#elif ON_APPLE + mach_timebase_info_data_t info = {0, 0}; mach_timebase_info(&info); - long long start,stop; + long long start, stop; #else timespec time_funct_start, time_funct_end; #endif @@ -2892,58 +2720,56 @@ double histogramRectMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, int kHistogramSize = 256; int bytes_per_line = input.width*input.numChannels; int *histogramAllChannels = new int[kHistogramSize*input.numChannels]; - int retVal= 0; + int retVal = 0; // function call if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif OpenclDevice::gpuEnv = *env; int wpl = pixGetWpl(input.pix); - retVal= OpenclDevice::HistogramRectOCL(input.imageData, input.numChannels, bytes_per_line, top, left, input.width, input.height, kHistogramSize, histogramAllChannels); + retVal = OpenclDevice::HistogramRectOCL( + input.imageData, input.numChannels, bytes_per_line, top, left, + input.width, input.height, kHistogramSize, histogramAllChannels); #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - if(retVal ==0) - { - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; - } - else - { - time= FLT_MAX; - } +#elif ON_APPLE + stop = mach_absolute_time(); + if (retVal == 0) { + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + } else { + time = FLT_MAX; + } #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; #endif } else { - int *histogram = new int[kHistogramSize]; #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif for (int ch = 0; ch < input.numChannels; ++ch) { - tesseract::HistogramRect(input.pix, input.numChannels, - left, top, input.width, input.height, histogram); + tesseract::HistogramRect(input.pix, input.numChannels, left, top, + input.width, input.height, histogram); } #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -2995,16 +2821,15 @@ void ThresholdRectToPix_Native(const unsigned char* imagedata, } double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData input, ds_device_type type ) { - double time; - int retVal =0; + int retVal = 0; #if ON_WINDOWS LARGE_INTEGER freq, time_funct_start, time_funct_end; QueryPerformanceFrequency(&freq); -#elif ON_APPLE - mach_timebase_info_data_t info = { 0, 0 }; +#elif ON_APPLE + mach_timebase_info_data_t info = {0, 0}; mach_timebase_info(&info); - long long start,stop; + long long start, stop; #else timespec time_funct_start, time_funct_end; #endif @@ -3030,29 +2855,29 @@ double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData i if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif OpenclDevice::gpuEnv = *env; int wpl = pixGetWpl(input.pix); - retVal= OpenclDevice::ThresholdRectToPixOCL(input.imageData, input.numChannels, bytes_per_line, thresholds, hi_values, &input.pix, input.height, input.width, top, left); + retVal = OpenclDevice::ThresholdRectToPixOCL( + input.imageData, input.numChannels, bytes_per_line, thresholds, + hi_values, &input.pix, input.height, input.width, top, left); #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - if(retVal ==0) - { - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9;; - } - else - { - time= FLT_MAX; - } +#elif ON_APPLE + stop = mach_absolute_time(); + if (retVal == 0) { + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; + ; + } else { + time = FLT_MAX; + } #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); @@ -3065,8 +2890,8 @@ double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData i thresholder.SetImage( input.pix ); #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif @@ -3076,9 +2901,9 @@ double thresholdRectToPixMicroBench( GPUEnv *env, TessScoreEvaluationInputData i #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -3097,10 +2922,10 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in #if ON_WINDOWS LARGE_INTEGER freq, time_funct_start, time_funct_end; QueryPerformanceFrequency(&freq); -#elif ON_APPLE - mach_timebase_info_data_t info = { 0, 0 }; +#elif ON_APPLE + mach_timebase_info_data_t info = {0, 0}; mach_timebase_info(&info); - long long start,stop; + long long start, stop; #else timespec time_funct_start, time_funct_end; #endif @@ -3118,8 +2943,8 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in if (type == DS_DEVICE_OPENCL_DEVICE) { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif @@ -3134,9 +2959,9 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -3144,8 +2969,8 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in } else { #if ON_WINDOWS QueryPerformanceCounter(&time_funct_start); -#elif ON_APPLE - start = mach_absolute_time(); +#elif ON_APPLE + start = mach_absolute_time(); #else clock_gettime( CLOCK_MONOTONIC, &time_funct_start ); #endif @@ -3163,9 +2988,9 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); -#elif ON_APPLE - stop = mach_absolute_time(); - time = ((stop - start) * (double) info.numer / info.denom) / 1.0E9; +#elif ON_APPLE + stop = mach_absolute_time(); + time = ((stop - start) * (double)info.numer / info.denom) / 1.0E9; #else clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); time = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; @@ -3183,7 +3008,6 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in #include "stdlib.h" - // encode score object as byte string ds_status serializeScore( ds_device* device, void **serializedScore, unsigned int* serializedScoreSize ) { *serializedScoreSize = sizeof(TessDeviceScore); @@ -3200,14 +3024,13 @@ ds_status deserializeScore( ds_device* device, const unsigned char* serializedSc return DS_SUCCESS; } -ds_status releaseScore( void* score ) { +ds_status releaseScore(void *score) { delete (TessDeviceScore *)score; return DS_SUCCESS; } // evaluate devices ds_status evaluateScoreForDevice( ds_device *device, void *inputData) { - // overwrite statuc gpuEnv w/ current device // so native opencl calls can be used; they use static gpuEnv printf("\n[DS] Device: \"%s\" (%s) evaluation...\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native" ); @@ -3243,14 +3066,12 @@ ds_status evaluateScoreForDevice( ds_device *device, void *inputData) { float composeRGBPixelWeight = 1.2f; float histogramRectWeight = 2.4f; float thresholdRectToPixWeight = 4.5f; - float getLineMasksMorphWeight = 5.0f; - - float weightedTime = - composeRGBPixelWeight * composeRGBPixelTime + - histogramRectWeight * histogramRectTime + - thresholdRectToPixWeight * thresholdRectToPixTime + - getLineMasksMorphWeight * getLineMasksMorphTime - ; + float getLineMasksMorphWeight = 5.0f; + + float weightedTime = composeRGBPixelWeight * composeRGBPixelTime + + histogramRectWeight * histogramRectTime + + thresholdRectToPixWeight * thresholdRectToPixTime + + getLineMasksMorphWeight * getLineMasksMorphTime; device->score = (void *)new TessDeviceScore; ((TessDeviceScore *)device->score)->time = weightedTime; @@ -3266,83 +3087,105 @@ ds_status evaluateScoreForDevice( ds_device *device, void *inputData) { // initial call to select device ds_device OpenclDevice::getDeviceSelection( ) { if (!deviceIsSelected) { -PERF_COUNT_START("getDeviceSelection") - // check if opencl is available at runtime - if( 1 == LoadOpencl() ) { - // opencl is available -//PERF_COUNT_SUB("LoadOpencl") - // setup devices - ds_status status; - ds_profile *profile; - status = initDSProfile( &profile, "v0.1" ); -PERF_COUNT_SUB("initDSProfile") - // try reading scores from file - const char *fileName = "tesseract_opencl_profile_devices.dat"; - status = readProfileFromFile( profile, deserializeScore, fileName); - if (status != DS_SUCCESS) { - // need to run evaluation - printf("[DS] Profile file not available (%s); performing profiling.\n", fileName); - - // create input data - TessScoreEvaluationInputData input; - populateTessScoreEvaluationInputData( &input ); -//PERF_COUNT_SUB("populateTessScoreEvaluationInputData") - // perform evaluations - unsigned int numUpdates; - status = profileDevices( profile, DS_EVALUATE_ALL, evaluateScoreForDevice, (void *)&input, &numUpdates ); -PERF_COUNT_SUB("profileDevices") - // write scores to file - if ( status == DS_SUCCESS ) { - status = writeProfileToFile( profile, serializeScore, fileName); -PERF_COUNT_SUB("writeProfileToFile") - if ( status == DS_SUCCESS ) { - printf("[DS] Scores written to file (%s).\n", fileName); + PERF_COUNT_START("getDeviceSelection") + // check if opencl is available at runtime + if (1 == LoadOpencl()) { + // opencl is available + // PERF_COUNT_SUB("LoadOpencl") + // setup devices + ds_status status; + ds_profile *profile; + status = initDSProfile(&profile, "v0.1"); + PERF_COUNT_SUB("initDSProfile") + // try reading scores from file + const char *fileName = "tesseract_opencl_profile_devices.dat"; + status = readProfileFromFile(profile, deserializeScore, fileName); + if (status != DS_SUCCESS) { + // need to run evaluation + printf("[DS] Profile file not available (%s); performing profiling.\n", + fileName); + + // create input data + TessScoreEvaluationInputData input; + populateTessScoreEvaluationInputData(&input); + // PERF_COUNT_SUB("populateTessScoreEvaluationInputData") + // perform evaluations + unsigned int numUpdates; + status = + profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, + (void *)&input, &numUpdates); + PERF_COUNT_SUB("profileDevices") + // write scores to file + if (status == DS_SUCCESS) { + status = writeProfileToFile(profile, serializeScore, fileName); + PERF_COUNT_SUB("writeProfileToFile") + if (status == DS_SUCCESS) { + printf("[DS] Scores written to file (%s).\n", fileName); + } else { + printf( + "[DS] Error saving scores to file (%s); scores not written to " + "file.\n", + fileName); + } } else { - printf("[DS] Error saving scores to file (%s); scores not written to file.\n", fileName); + printf( + "[DS] Unable to evaluate performance; scores not written to " + "file.\n"); } } else { - printf("[DS] Unable to evaluate performance; scores not written to file.\n"); + PERF_COUNT_SUB("readProfileFromFile") + printf("[DS] Profile read from file (%s).\n", fileName); } - } else { - -PERF_COUNT_SUB("readProfileFromFile") - printf("[DS] Profile read from file (%s).\n", fileName); - } - - // we now have device scores either from file or evaluation - // select fastest using custom Tesseract selection algorithm - float bestTime = FLT_MAX; // begin search with worst possible time - int bestDeviceIdx = -1; - for (int d = 0; d < profile->numDevices; d++) { - ds_device device = profile->devices[d]; - TessDeviceScore score = *(TessDeviceScore *)device.score; - float time = score.time; - printf("[DS] Device[%i] %i:%s score is %f\n", d+1, device.type, device.oclDeviceName, time); - if (time < bestTime) { - bestTime = time; + // we now have device scores either from file or evaluation + // select fastest using custom Tesseract selection algorithm + float bestTime = FLT_MAX; // begin search with worst possible time + int bestDeviceIdx = -1; + for (int d = 0; d < profile->numDevices; d++) { + ds_device device = profile->devices[d]; + TessDeviceScore score = *(TessDeviceScore *)device.score; + + float time = score.time; + printf("[DS] Device[%i] %i:%s score is %f\n", d + 1, device.type, + device.oclDeviceName, time); + if (time < bestTime) { + bestTime = time; bestDeviceIdx = d; + } } - } - printf("[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx+1, profile->devices[bestDeviceIdx].oclDeviceName, profile->devices[bestDeviceIdx].type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); - // cleanup - // TODO: call destructor for profile object? + printf("[DS] Selected Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1, + profile->devices[bestDeviceIdx].oclDeviceName, + profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE + ? "OpenCL" + : "Native"); + // cleanup + // TODO: call destructor for profile object? bool overridden = false; char *overrideDeviceStr = getenv("TESSERACT_OPENCL_DEVICE"); if (overrideDeviceStr != NULL) { int overrideDeviceIdx = atoi(overrideDeviceStr); - if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices ) { - printf("[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, %i)\n", overrideDeviceStr, overrideDeviceIdx); + if (overrideDeviceIdx > 0 && overrideDeviceIdx <= profile->numDevices) { + printf( + "[DS] Overriding Device Selection (TESSERACT_OPENCL_DEVICE=%s, " + "%i)\n", + overrideDeviceStr, overrideDeviceIdx); bestDeviceIdx = overrideDeviceIdx - 1; overridden = true; } else { - printf("[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are valid devices).\n", overrideDeviceStr, profile->numDevices); + printf( + "[DS] Ignoring invalid TESSERACT_OPENCL_DEVICE=%s ([1,%i] are " + "valid devices).\n", + overrideDeviceStr, profile->numDevices); } } if (overridden) { - printf("[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx+1, profile->devices[bestDeviceIdx].oclDeviceName, profile->devices[bestDeviceIdx].type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native"); + printf("[DS] Overridden Device[%i]: \"%s\" (%s)\n", bestDeviceIdx + 1, + profile->devices[bestDeviceIdx].oclDeviceName, + profile->devices[bestDeviceIdx].type == DS_DEVICE_OPENCL_DEVICE + ? "OpenCL" + : "Native"); } selectedDevice = profile->devices[bestDeviceIdx]; // cleanup @@ -3357,10 +3200,10 @@ PERF_COUNT_SUB("readProfileFromFile") selectedDevice.oclDriverVersion = NULL; } deviceIsSelected = true; -PERF_COUNT_SUB("select from Profile") -PERF_COUNT_END + PERF_COUNT_SUB("select from Profile") + PERF_COUNT_END } -//PERF_COUNT_END + // PERF_COUNT_END return selectedDevice; } @@ -3375,8 +3218,6 @@ bool OpenclDevice::selectedDeviceIsNativeCPU() { return (device.type == DS_DEVICE_NATIVE_CPU); } - - /*! * pixConvertRGBToGray() from leptonica, converted to opencl kernel * @@ -3388,111 +3229,113 @@ bool OpenclDevice::selectedDeviceIsNativeCPU() { * Notes: * (1) Use a weighted average of the RGB values. */ -#define SET_DATA_BYTE( pdata, n, val ) (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val)) +#define SET_DATA_BYTE(pdata, n, val) \ + (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val)) -Pix * OpenclDevice::pixConvertRGBToGrayOCL( - Pix *srcPix, // 32-bit source - float rwt, - float gwt, - float bwt ) -{ -PERF_COUNT_START("pixConvertRGBToGrayOCL") - Pix *dstPix; // 8-bit destination +Pix *OpenclDevice::pixConvertRGBToGrayOCL(Pix *srcPix, // 32-bit source + float rwt, float gwt, float bwt) { + PERF_COUNT_START("pixConvertRGBToGrayOCL") + Pix *dstPix; // 8-bit destination - if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0) return NULL; + if (rwt < 0.0 || gwt < 0.0 || bwt < 0.0) return NULL; - if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0) { - // magic numbers from leptonica - rwt = 0.3; - gwt = 0.5; - bwt = 0.2; - } - // normalize - float sum = rwt + gwt + bwt; - rwt /= sum; - gwt /= sum; - bwt /= sum; - - // source pix - int w, h; - pixGetDimensions(srcPix, &w, &h, NULL); - //printf("Image is %i x %i\n", w, h); - unsigned int *srcData = pixGetData(srcPix); - int srcWPL = pixGetWpl(srcPix); - int srcSize = srcWPL * h * sizeof(unsigned int); - - // destination pix - if ((dstPix = pixCreate(w, h, 8)) == NULL) - return NULL; - pixCopyResolution(dstPix, srcPix); - unsigned int *dstData = pixGetData(dstPix); - int dstWPL = pixGetWpl(dstPix); - int dstWords = dstWPL * h; - int dstSize = dstWords * sizeof(unsigned int); - //printf("dstSize = %i\n", dstSize); -PERF_COUNT_SUB("pix setup") - - // opencl objects - cl_int clStatus; - KernelEnv kEnv; - SetKernelEnv( &kEnv ); - - // source buffer - cl_mem srcBuffer = clCreateBuffer( kEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, srcSize, (void *)srcData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer srcBuffer"); - - // destination buffer - cl_mem dstBuffer = clCreateBuffer( kEnv.mpkContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, dstSize, (void *)dstData, &clStatus ); - CHECK_OPENCL( clStatus, "clCreateBuffer dstBuffer"); - - // setup work group size parameters - int block_size = 256; - int numWorkGroups = ((h*w+block_size-1) / block_size ); - int numThreads = block_size*numWorkGroups; - size_t local_work_size[] = {static_cast(block_size)}; - size_t global_work_size[] = {static_cast(numThreads)}; - //printf("Enqueueing %i threads for %i output pixels\n", numThreads, w*h); - - /* compile kernel */ - kEnv.mpkKernel = clCreateKernel( kEnv.mpkProgram, "kernel_RGBToGray", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel kernel_RGBToGray"); - - - /* set kernel arguments */ - clStatus = clSetKernelArg( kEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&srcBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg srcBuffer"); - clStatus = clSetKernelArg( kEnv.mpkKernel, 1, sizeof(cl_mem), (void *)&dstBuffer ); - CHECK_OPENCL( clStatus, "clSetKernelArg dstBuffer"); - clStatus = clSetKernelArg( kEnv.mpkKernel, 2, sizeof(int), (void *)&srcWPL ); - CHECK_OPENCL( clStatus, "clSetKernelArg srcWPL" ); - clStatus = clSetKernelArg( kEnv.mpkKernel, 3, sizeof(int), (void *)&dstWPL ); - CHECK_OPENCL( clStatus, "clSetKernelArg dstWPL" ); - clStatus = clSetKernelArg( kEnv.mpkKernel, 4, sizeof(int), (void *)&h ); - CHECK_OPENCL( clStatus, "clSetKernelArg height" ); - clStatus = clSetKernelArg( kEnv.mpkKernel, 5, sizeof(int), (void *)&w ); - CHECK_OPENCL( clStatus, "clSetKernelArg width" ); - clStatus = clSetKernelArg( kEnv.mpkKernel, 6, sizeof(float), (void *)&rwt ); - CHECK_OPENCL( clStatus, "clSetKernelArg rwt" ); - clStatus = clSetKernelArg( kEnv.mpkKernel, 7, sizeof(float), (void *)&gwt ); - CHECK_OPENCL( clStatus, "clSetKernelArg gwt"); - clStatus = clSetKernelArg( kEnv.mpkKernel, 8, sizeof(float), (void *)&bwt ); - CHECK_OPENCL( clStatus, "clSetKernelArg bwt"); - - /* launch kernel & wait */ -PERF_COUNT_SUB("before") - clStatus = clEnqueueNDRangeKernel( - kEnv.mpkCmdQueue, - kEnv.mpkKernel, - 1, NULL, global_work_size, local_work_size, - 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_RGBToGray" ); - clFinish( kEnv.mpkCmdQueue ); -PERF_COUNT_SUB("kernel") - - /* map results back from gpu */ - void *ptr = clEnqueueMapBuffer(kEnv.mpkCmdQueue, dstBuffer, CL_TRUE, CL_MAP_READ, 0, dstSize, 0, NULL, NULL, &clStatus); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer dstBuffer"); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, dstBuffer, ptr, 0, NULL, NULL); + if (rwt == 0.0 && gwt == 0.0 && bwt == 0.0) { + // magic numbers from leptonica + rwt = 0.3; + gwt = 0.5; + bwt = 0.2; + } + // normalize + float sum = rwt + gwt + bwt; + rwt /= sum; + gwt /= sum; + bwt /= sum; + + // source pix + int w, h; + pixGetDimensions(srcPix, &w, &h, NULL); + // printf("Image is %i x %i\n", w, h); + unsigned int *srcData = pixGetData(srcPix); + int srcWPL = pixGetWpl(srcPix); + int srcSize = srcWPL * h * sizeof(unsigned int); + + // destination pix + if ((dstPix = pixCreate(w, h, 8)) == NULL) return NULL; + pixCopyResolution(dstPix, srcPix); + unsigned int *dstData = pixGetData(dstPix); + int dstWPL = pixGetWpl(dstPix); + int dstWords = dstWPL * h; + int dstSize = dstWords * sizeof(unsigned int); + // printf("dstSize = %i\n", dstSize); + PERF_COUNT_SUB("pix setup") + + // opencl objects + cl_int clStatus; + KernelEnv kEnv; + SetKernelEnv(&kEnv); + + // source buffer + cl_mem srcBuffer = + clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + srcSize, (void *)srcData, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer srcBuffer"); + + // destination buffer + cl_mem dstBuffer = + clCreateBuffer(kEnv.mpkContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, + dstSize, (void *)dstData, &clStatus); + CHECK_OPENCL(clStatus, "clCreateBuffer dstBuffer"); + + // setup work group size parameters + int block_size = 256; + int numWorkGroups = ((h * w + block_size - 1) / block_size); + int numThreads = block_size * numWorkGroups; + size_t local_work_size[] = {static_cast(block_size)}; + size_t global_work_size[] = {static_cast(numThreads)}; + // printf("Enqueueing %i threads for %i output pixels\n", numThreads, w*h); + + /* compile kernel */ + kEnv.mpkKernel = + clCreateKernel(kEnv.mpkProgram, "kernel_RGBToGray", &clStatus); + CHECK_OPENCL(clStatus, "clCreateKernel kernel_RGBToGray"); + + /* set kernel arguments */ + clStatus = + clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&srcBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg srcBuffer"); + clStatus = + clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), (void *)&dstBuffer); + CHECK_OPENCL(clStatus, "clSetKernelArg dstBuffer"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(int), (void *)&srcWPL); + CHECK_OPENCL(clStatus, "clSetKernelArg srcWPL"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(int), (void *)&dstWPL); + CHECK_OPENCL(clStatus, "clSetKernelArg dstWPL"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(int), (void *)&h); + CHECK_OPENCL(clStatus, "clSetKernelArg height"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 5, sizeof(int), (void *)&w); + CHECK_OPENCL(clStatus, "clSetKernelArg width"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 6, sizeof(float), (void *)&rwt); + CHECK_OPENCL(clStatus, "clSetKernelArg rwt"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 7, sizeof(float), (void *)&gwt); + CHECK_OPENCL(clStatus, "clSetKernelArg gwt"); + clStatus = clSetKernelArg(kEnv.mpkKernel, 8, sizeof(float), (void *)&bwt); + CHECK_OPENCL(clStatus, "clSetKernelArg bwt"); + + /* launch kernel & wait */ + PERF_COUNT_SUB("before") + clStatus = + clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, NULL, + global_work_size, local_work_size, 0, NULL, NULL); + CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_RGBToGray"); + clFinish(kEnv.mpkCmdQueue); + PERF_COUNT_SUB("kernel") + + /* map results back from gpu */ + void *ptr = + clEnqueueMapBuffer(kEnv.mpkCmdQueue, dstBuffer, CL_TRUE, CL_MAP_READ, 0, + dstSize, 0, NULL, NULL, &clStatus); + CHECK_OPENCL(clStatus, "clEnqueueMapBuffer dstBuffer"); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, dstBuffer, ptr, 0, NULL, NULL); #if 0 // validate: compute on cpu @@ -3529,13 +3372,12 @@ PERF_COUNT_SUB("kernel") //printf("\n"); } #endif - // release opencl objects - clReleaseMemObject(srcBuffer); - clReleaseMemObject(dstBuffer); - + // release opencl objects + clReleaseMemObject(srcBuffer); + clReleaseMemObject(dstBuffer); -PERF_COUNT_END - // success - return dstPix; + PERF_COUNT_END + // success + return dstPix; } #endif diff --git a/opencl/openclwrapper.h b/opencl/openclwrapper.h index f7e9ad7891..cfd612aa98 100644 --- a/opencl/openclwrapper.h +++ b/opencl/openclwrapper.h @@ -19,7 +19,8 @@ // including CL/cl.h doesn't occur until USE_OPENCL defined below // platform preprocessor commands -#if defined( WIN32 ) || defined( __WIN32__ ) || defined( _WIN32 ) || defined( __CYGWIN__ ) || defined( __MINGW32__ ) +#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || \ + defined(__CYGWIN__) || defined(__MINGW32__) #define ON_WINDOWS 1 #define ON_LINUX 0 #define ON_APPLE 0 @@ -89,21 +90,23 @@ time_sub_start = time_funct_start; \ time_sub_end = time_funct_start; -#define PERF_COUNT_END \ - QueryPerformanceCounter(&time_funct_end); \ - elapsed_time_sec = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); \ - printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); +#define PERF_COUNT_END \ + QueryPerformanceCounter(&time_funct_end); \ + elapsed_time_sec = (time_funct_end.QuadPart - time_funct_start.QuadPart) / \ + (double)(freq.QuadPart); \ + printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); #else #define PERF_COUNT_START(FUNCT_NAME) #define PERF_COUNT_END #endif #if PERF_COUNT_VERBOSE >= 3 -#define PERF_COUNT_SUB(SUB) \ - QueryPerformanceCounter(&time_sub_end); \ - elapsed_time_sec = (time_sub_end.QuadPart-time_sub_start.QuadPart)/(double)(freq.QuadPart); \ - printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ - time_sub_start = time_sub_end; +#define PERF_COUNT_SUB(SUB) \ + QueryPerformanceCounter(&time_sub_end); \ + elapsed_time_sec = (time_sub_end.QuadPart - time_sub_start.QuadPart) / \ + (double)(freq.QuadPart); \ + printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ + time_sub_start = time_sub_end; #else #define PERF_COUNT_SUB(SUB) #endif @@ -121,21 +124,25 @@ time_sub_start = time_funct_start; \ time_sub_end = time_funct_start; -#define PERF_COUNT_END \ - clock_gettime( CLOCK_MONOTONIC, &time_funct_end ); \ - elapsed_time_sec = (time_funct_end.tv_sec - time_funct_start.tv_sec)*1.0 + (time_funct_end.tv_nsec - time_funct_start.tv_nsec)/1000000000.0; \ - printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); +#define PERF_COUNT_END \ + clock_gettime(CLOCK_MONOTONIC, &time_funct_end); \ + elapsed_time_sec = \ + (time_funct_end.tv_sec - time_funct_start.tv_sec) * 1.0 + \ + (time_funct_end.tv_nsec - time_funct_start.tv_nsec) / 1000000000.0; \ + printf(PERF_COUNT_REPORT_STR, funct_name, "total", elapsed_time_sec); #else #define PERF_COUNT_START(FUNCT_NAME) #define PERF_COUNT_END #endif #if PERF_COUNT_VERBOSE >= 3 -#define PERF_COUNT_SUB(SUB) \ - clock_gettime( CLOCK_MONOTONIC, &time_sub_end ); \ - elapsed_time_sec = (time_sub_end.tv_sec - time_sub_start.tv_sec)*1.0 + (time_sub_end.tv_nsec - time_sub_start.tv_nsec)/1000000000.0; \ - printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ - time_sub_start = time_sub_end; +#define PERF_COUNT_SUB(SUB) \ + clock_gettime(CLOCK_MONOTONIC, &time_sub_end); \ + elapsed_time_sec = \ + (time_sub_end.tv_sec - time_sub_start.tv_sec) * 1.0 + \ + (time_sub_end.tv_nsec - time_sub_start.tv_nsec) / 1000000000.0; \ + printf(PERF_COUNT_REPORT_STR, funct_name, SUB, elapsed_time_sec); \ + time_sub_start = time_sub_end; #else #define PERF_COUNT_SUB(SUB) #endif @@ -262,12 +269,12 @@ class OpenclDevice // OpenCL implementation of Morphology (Hollow = Closed - Open) static PIX* pixHollowCL(PIX *pixd, PIX *pixs, l_int32 close_hsize, l_int32 close_vsize, l_int32 open_hsize, l_int32 open_vsize, bool reqDataCopy); - static void pixGetLinesCL(PIX *pixd, PIX *pixs, - PIX** pix_vline, PIX** pix_hline, - PIX** pixClosed, bool getpixClosed, - l_int32 close_hsize, l_int32 close_vsize, - l_int32 open_hsize, l_int32 open_vsize, - l_int32 line_hsize, l_int32 line_vsize); + static void pixGetLinesCL(PIX *pixd, PIX *pixs, PIX **pix_vline, + PIX **pix_hline, PIX **pixClosed, + bool getpixClosed, l_int32 close_hsize, + l_int32 close_vsize, l_int32 open_hsize, + l_int32 open_vsize, l_int32 line_hsize, + l_int32 line_vsize); //int InitOpenclAttr( OpenCLEnv * env ); //int ReleaseKernel( KernelEnv * env ); @@ -288,34 +295,24 @@ class OpenclDevice static void FreeOpenclDll(); #endif - inline static int AddKernelConfig( int kCount, const char *kName ); /* for binarization */ - static int HistogramRectOCL( - const unsigned char *imagedata, - int bytes_per_pixel, - int bytes_per_line, - int left, - int top, - int width, - int height, - int kHistogramSize, - int *histogramAllChannels); - - static int ThresholdRectToPixOCL( - const unsigned char* imagedata, - int bytes_per_pixel, - int bytes_per_line, - const int* thresholds, - const int* hi_values, - Pix** pix, - int rect_height, - int rect_width, - int rect_top, - int rect_left); - - static Pix * pixConvertRGBToGrayOCL( Pix *pix, float weightRed = 0.3, float weightGreen = 0.5, float weightBlue = 0.2 ); + static int HistogramRectOCL(const unsigned char *imagedata, + int bytes_per_pixel, int bytes_per_line, + int left, int top, int width, int height, + int kHistogramSize, int *histogramAllChannels); + + static int ThresholdRectToPixOCL(const unsigned char *imagedata, + int bytes_per_pixel, int bytes_per_line, + const int *thresholds, + const int *hi_values, Pix **pix, + int rect_height, int rect_width, + int rect_top, int rect_left); + + static Pix *pixConvertRGBToGrayOCL(Pix *pix, float weightRed = 0.3, + float weightGreen = 0.5, + float weightBlue = 0.2); static ds_device getDeviceSelection(); static ds_device selectedDevice; diff --git a/tessdata/configs/box.train.stderr b/tessdata/configs/box.train.stderr index 6fc51fdd5e..d44ff2b2c7 100644 --- a/tessdata/configs/box.train.stderr +++ b/tessdata/configs/box.train.stderr @@ -1,7 +1,7 @@ -file_type .bl -#tessedit_use_nn F -textord_fast_pitch_test T -tessedit_single_match 0 +file_type .bl +#tessedit_use_nn F +textord_fast_pitch_test T +tessedit_single_match 0 tessedit_zero_rejection T tessedit_minimal_rejection F tessedit_write_rep_codes F diff --git a/textord/blkocc.h b/textord/blkocc.h index 89462dc86f..8305c36cdb 100644 --- a/textord/blkocc.h +++ b/textord/blkocc.h @@ -72,12 +72,12 @@ ELISTIZEH (REGION_OCC) Adapted from the following procedure so that it can be used in the bands class in an include file... -BOOL8 range_in_band[ +BOOL8 range_in_band[ range within band? -inT16 band_max, -inT16 band_min, -inT16 range_max, -inT16 range_min] +inT16 band_max, +inT16 band_min, +inT16 range_max, +inT16 range_min] { if ( (range_min >= band_min) && (range_max < band_max) ) return TRUE; @@ -91,12 +91,12 @@ inT16 range_min] Adapted from the following procedure so that it can be used in the bands class in an include file... -BOOL8 range_overlaps_band[ +BOOL8 range_overlaps_band[ range crosses band? -inT16 band_max, -inT16 band_min, -inT16 range_max, -inT16 range_min] +inT16 band_max, +inT16 band_min, +inT16 range_max, +inT16 range_min] { if ( (range_max >= band_min) && (range_min < band_max) ) return TRUE; diff --git a/textord/drawedg.h b/textord/drawedg.h index 6bf062d4ee..ef5ed5e202 100644 --- a/textord/drawedg.h +++ b/textord/drawedg.h @@ -1,8 +1,9 @@ /********************************************************************** * File: drawedg.h (Formerly drawedge.h) - * Description: Collection of functions to draw things to do with edge detection. - * Author: Ray Smith - * Created: Thu Jun 06 13:29:20 BST 1991 + * Description: Collection of functions to draw things to do with edge + *detection. + * Author: Ray Smith + * Created: Thu Jun 06 13:29:20 BST 1991 * * (C) Copyright 1991, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/textord/fpchop.cpp b/textord/fpchop.cpp index 4c18338b8f..be2768cce9 100644 --- a/textord/fpchop.cpp +++ b/textord/fpchop.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: fpchop.cpp (Formerly fp_chop.c) * Description: Code to chop fixed pitch text into character cells. - * Author: Ray Smith - * Created: Thu Sep 16 11:14:15 BST 1993 + * Author: Ray Smith + * Created: Thu Sep 16 11:14:15 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -730,7 +730,6 @@ C_OUTLINE *join_chopped_fragments( //join pieces return NULL; } - /********************************************************************** * join_segments * diff --git a/textord/imagefind.cpp b/textord/imagefind.cpp index c119e69f95..4f27779b8e 100644 --- a/textord/imagefind.cpp +++ b/textord/imagefind.cpp @@ -77,7 +77,7 @@ Pix* ImageFind::FindImages(Pix* pix) { // Leptonica will print an error message and return NULL if we call // pixGenHalftoneMask(pixr, NULL, ...) with too small image, so we // want to bypass that. - if (pixGetWidth(pixr) < kMinImageFindSize || + if (pixGetWidth(pixr) < kMinImageFindSize || pixGetHeight(pixr) < kMinImageFindSize) { pixDestroy(&pixr); return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); diff --git a/textord/makerow.cpp b/textord/makerow.cpp index 1df4855b40..c8170e5d16 100644 --- a/textord/makerow.cpp +++ b/textord/makerow.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: makerow.cpp (Formerly makerows.c) * Description: Code to arrange blobs into rows of text. - * Author: Ray Smith - * Created: Mon Sep 21 14:34:48 BST 1992 + * Author: Ray Smith + * Created: Mon Sep 21 14:34:48 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -671,7 +671,7 @@ BOOL8 find_best_dropout_row( //find neighbours TO_ROW_IT *row_it, //current position BOOL8 testing_on //correct orientation ) { - inT32 next_index; //of neighbouring row + inT32 next_index; // of neighbouring row inT32 row_offset; //from current row inT32 abs_dist; //absolute distance inT8 row_inc; //increment to row_index @@ -1786,7 +1786,7 @@ static int CountOverlaps(const TBOX& box, int min_height, BLOBNBOX_IT blob_it(blobs); for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { BLOBNBOX* blob = blob_it.data(); - TBOX blob_box = blob->bounding_box(); + const TBOX &blob_box = blob->bounding_box(); if (blob_box.height() >= min_height && box.major_overlap(blob_box)) { ++overlaps; } diff --git a/textord/oldbasel.cpp b/textord/oldbasel.cpp index c73fe8d57b..99e55fdbb6 100644 --- a/textord/oldbasel.cpp +++ b/textord/oldbasel.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: oldbasel.cpp (Formerly oldbl.c) * Description: A re-implementation of the old baseline algorithm. - * Author: Ray Smith - * Created: Wed Oct 6 09:41:48 BST 1993 + * Author: Ray Smith + * Created: Wed Oct 6 09:41:48 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -122,7 +122,7 @@ void Textord::correlate_lines(TO_BLOCK *block, float gradient) { TO_ROW **rows; //array of ptrs int rowcount; /*no of rows to do */ int rowindex; /*no of row */ - //iterator + // iterator TO_ROW_IT row_it = block->get_rows (); rowcount = row_it.length (); @@ -1018,61 +1018,6 @@ int *partcount /*no of partitions */ return bestpart; } - -///*merge_partitions(partids,partcount,blobcount,bestpart) discards funny looking -//partitions and gives all the rest partid 0*/ -// -//merge_partitions(partids,partcount,blobcount,bestpart) -//register char *partids; /*partition numbers*/ -//int partcount; /*no of partitions*/ -//int blobcount; /*no of blobs*/ -//int bestpart; /*best partition*/ -//{ -// int blobindex; /*no along text line*/ -// int runlength; /*run of same partition*/ -// int bestrun; /*biggest runlength*/ -// -// bestrun=0; /*no runs yet*/ -// runlength=1; -// for (blobindex=1;blobindexbestrun) -// bestrun=runlength; /*find biggest run*/ -// runlength=1; /*new run*/ -// } -// else -// { runlength++; -// } -// } -// if (runlength>bestrun) -// bestrun=runlength; -// -// for (blobindex=0;blobindex=blobcount -// || partids[blobindex]!=partids[blobindex+1]) -// /*loner*/ -// && (bestrun>2 || partids[blobindex]!=bestpart)) -// { partids[blobindex]=partcount; /*discard loner*/ -// } -// else if (blobindex+1=blobcount -// || partids[blobindex]!=partids[blobindex+2]) -// && (bestrun>3 || partids[blobindex]!=bestpart)) -// { partids[blobindex]=partcount; /*discard both*/ -// partids[blobindex+1]=partcount; -// } -// } -// } -// for (blobindex=0;blobindex= MINASCRISE && partsizes[partition] > poscount) { @@ -1459,8 +1402,8 @@ int blobcount, /*blobs in blobcoords */ QSPLINE * baseline, /*established */ float jumplimit /*min ascender height */ ) { - int blobindex; /*current blob */ - /*height statistics */ + int blobindex; /*current blob */ + /*height statistics */ STATS heightstat (0, MAXHEIGHT); int height; /*height of blob */ int xcentre; /*centre of blob */ diff --git a/textord/pithsync.h b/textord/pithsync.h index f9ba479e2f..386426be72 100644 --- a/textord/pithsync.h +++ b/textord/pithsync.h @@ -1,8 +1,8 @@ /********************************************************************** * File: pithsync.h (Formerly pitsync2.h) * Description: Code to find the optimum fixed pitch segmentation of some blobs. - * Author: Ray Smith - * Created: Thu Nov 19 11:48:05 GMT 1992 + * Author: Ray Smith + * Created: Thu Nov 19 11:48:05 GMT 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -66,7 +66,7 @@ class FPCUTPT inT16 pitch, //proposed pitch inT16 pitch_error); //allowed tolerance - inT32 position() { //access func + inT32 position() { // access func return xpos; } double cost_function() { diff --git a/textord/pitsync1.h b/textord/pitsync1.h index c2fb9bec65..5374b003dd 100644 --- a/textord/pitsync1.h +++ b/textord/pitsync1.h @@ -1,8 +1,8 @@ /********************************************************************** * File: pitsync1.h (Formerly pitsync.h) * Description: Code to find the optimum fixed pitch segmentation of some blobs. - * Author: Ray Smith - * Created: Thu Nov 19 11:48:05 GMT 1992 + * Author: Ray Smith + * Created: Thu Nov 19 11:48:05 GMT 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -46,7 +46,7 @@ class FPSEGPT:public ELIST_LINK FPSEGPT_LIST *prev_list); //previous segment FPSEGPT(FPCUTPT *cutpt); //build from new type - inT32 position() { //access func + inT32 position() { // access func return xpos; } double cost_function() { diff --git a/textord/tabvector.cpp b/textord/tabvector.cpp index c8d508f0df..fcc64a7721 100644 --- a/textord/tabvector.cpp +++ b/textord/tabvector.cpp @@ -523,12 +523,12 @@ const char* kAlignmentNames[] = { // Print basic information about this tab vector. void TabVector::Print(const char* prefix) { - tprintf("%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d," - " partners=%d\n", - prefix, kAlignmentNames[alignment_], - startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y(), - mean_width_, percent_score_, sort_key_, - boxes_.length(), partners_.length()); + tprintf( + "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d," + " partners=%d\n", + prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(), + endpt_.x(), endpt_.y(), mean_width_, percent_score_, sort_key_, + boxes_.length(), partners_.length()); } // Print basic information about this tab vector and every box in it. diff --git a/textord/textlineprojection.cpp b/textord/textlineprojection.cpp index 6018e5fdaa..2651a19b33 100644 --- a/textord/textlineprojection.cpp +++ b/textord/textlineprojection.cpp @@ -760,7 +760,7 @@ void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const { pt->y = ClipToRange(pt->y, 0, pixGetHeight(pix_) - 1); } #ifdef _MSC_VER -#pragma optimize( "", on ) +#pragma optimize("", on) #endif // _MSC_VER // Transform tesseract image coordinates to coordinates used in the projection. diff --git a/textord/textord.cpp b/textord/textord.cpp index 1f7e8a8869..94ef49c783 100644 --- a/textord/textord.cpp +++ b/textord/textord.cpp @@ -33,7 +33,8 @@ namespace tesseract { Textord::Textord(CCStruct* ccstruct) - : ccstruct_(ccstruct), use_cjk_fp_model_(false), + : ccstruct_(ccstruct), + use_cjk_fp_model_(false), // makerow.cpp /////////////////////////////////////////// BOOL_MEMBER(textord_single_height_mode, false, "Script has no xheight, so use a single mode", @@ -46,24 +47,20 @@ Textord::Textord(CCStruct* ccstruct) "old_to_method.", ccstruct_->params()), BOOL_MEMBER(tosp_only_use_prop_rows, true, - "Block stats to use fixed pitch rows?", - ccstruct_->params()), + "Block stats to use fixed pitch rows?", ccstruct_->params()), BOOL_MEMBER(tosp_force_wordbreak_on_punct, false, "Force word breaks on punct to break long lines in non-space " "delimited langs", ccstruct_->params()), - BOOL_MEMBER(tosp_use_pre_chopping, false, - "Space stats use prechopping?", + BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?", ccstruct_->params()), BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", ccstruct_->params()), - BOOL_MEMBER(tosp_block_use_cert_spaces, true, - "Only stat OBVIOUS spaces", + BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params()), BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", ccstruct_->params()), - BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, - "Only stat OBVIOUS spaces", + BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces", ccstruct_->params()), BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", ccstruct_->params()), @@ -78,30 +75,24 @@ Textord::Textord(CCStruct* ccstruct) "Don't restrict kn->sp fuzzy limit to tables", ccstruct_->params()), BOOL_MEMBER(tosp_stats_use_xht_gaps, true, - "Use within xht gap for wd breaks", - ccstruct_->params()), + "Use within xht gap for wd breaks", ccstruct_->params()), BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", ccstruct_->params()), BOOL_MEMBER(tosp_only_use_xht_gaps, false, - "Only use within xht gap for wd breaks", - ccstruct_->params()), + "Only use within xht gap for wd breaks", ccstruct_->params()), BOOL_MEMBER(tosp_rule_9_test_punct, false, - "Don't chng kn to space next to punct", - ccstruct_->params()), + "Don't chng kn to space next to punct", ccstruct_->params()), BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", ccstruct_->params()), BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", ccstruct_->params()), BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", ccstruct_->params()), - INT_MEMBER(tosp_debug_level, 0, "Debug data", - ccstruct_->params()), + INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()), INT_MEMBER(tosp_enough_space_samples_for_median, 3, - "or should we use mean", - ccstruct_->params()), + "or should we use mean", ccstruct_->params()), INT_MEMBER(tosp_redo_kern_limit, 10, - "No.samples reqd to reestimate for row", - ccstruct_->params()), + "No.samples reqd to reestimate for row", ccstruct_->params()), INT_MEMBER(tosp_few_samples, 40, "No.gaps reqd with 1 large gap to treat as a table", ccstruct_->params()), @@ -114,30 +105,24 @@ Textord::Textord(CCStruct* ccstruct) "Factor for defining space threshold in terms of space and " "kern sizes", ccstruct_->params()), - double_MEMBER(tosp_threshold_bias1, 0, - "how far between kern and space?", + double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?", ccstruct_->params()), - double_MEMBER(tosp_threshold_bias2, 0, - "how far between kern and space?", + double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?", ccstruct_->params()), double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", ccstruct_->params()), double_MEMBER(tosp_narrow_aspect_ratio, 0.48, - "narrow if w/h less than this", - ccstruct_->params()), + "narrow if w/h less than this", ccstruct_->params()), double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", ccstruct_->params()), double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", ccstruct_->params()), double_MEMBER(tosp_fuzzy_space_factor, 0.6, - "Fract of xheight for fuzz sp", - ccstruct_->params()), + "Fract of xheight for fuzz sp", ccstruct_->params()), double_MEMBER(tosp_fuzzy_space_factor1, 0.5, - "Fract of xheight for fuzz sp", - ccstruct_->params()), + "Fract of xheight for fuzz sp", ccstruct_->params()), double_MEMBER(tosp_fuzzy_space_factor2, 0.72, - "Fract of xheight for fuzz sp", - ccstruct_->params()), + "Fract of xheight for fuzz sp", ccstruct_->params()), double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", ccstruct_->params()), double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", @@ -156,14 +141,11 @@ Textord::Textord(CCStruct* ccstruct) "Fract of kerns reqd for isolated row stats", ccstruct_->params()), double_MEMBER(tosp_table_kn_sp_ratio, 2.25, - "Min difference of kn & sp in table", - ccstruct_->params()), + "Min difference of kn & sp in table", ccstruct_->params()), double_MEMBER(tosp_table_xht_sp_ratio, 0.33, - "Expect spaces bigger than this", - ccstruct_->params()), + "Expect spaces bigger than this", ccstruct_->params()), double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, - "Fuzzy if less than this", - ccstruct_->params()), + "Fuzzy if less than this", ccstruct_->params()), double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", ccstruct_->params()), double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", @@ -172,20 +154,16 @@ Textord::Textord(CCStruct* ccstruct) "Don't trust spaces less than this time kn", ccstruct_->params()), double_MEMBER(tosp_init_guess_kn_mult, 2.2, - "Thresh guess - mult kn by this", - ccstruct_->params()), + "Thresh guess - mult kn by this", ccstruct_->params()), double_MEMBER(tosp_init_guess_xht_mult, 0.28, - "Thresh guess - mult xht by this", - ccstruct_->params()), + "Thresh guess - mult xht by this", ccstruct_->params()), double_MEMBER(tosp_max_sane_kn_thresh, 5.0, - "Multiplier on kn to limit thresh", - ccstruct_->params()), + "Multiplier on kn to limit thresh", ccstruct_->params()), double_MEMBER(tosp_flip_caution, 0.0, "Don't autoflip kn to sp when large separation", ccstruct_->params()), double_MEMBER(tosp_large_kerning, 0.19, - "Limit use of xht gap with large kns", - ccstruct_->params()), + "Limit use of xht gap with large kns", ccstruct_->params()), double_MEMBER(tosp_dont_fool_with_small_kerns, -1, "Limit use of xht gap with odd small kns", ccstruct_->params()), @@ -193,11 +171,9 @@ Textord::Textord(CCStruct* ccstruct) "Don't reduce box if the top left is non blank", ccstruct_->params()), double_MEMBER(tosp_silly_kn_sp_gap, 0.2, - "Don't let sp minus kn get too small", - ccstruct_->params()), + "Don't let sp minus kn get too small", ccstruct_->params()), double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, - "How wide fuzzies need context", - ccstruct_->params()), + "How wide fuzzies need context", ccstruct_->params()), // tordmain.cpp /////////////////////////////////////////// BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", ccstruct_->params()), @@ -206,34 +182,27 @@ Textord::Textord(CCStruct* ccstruct) BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", ccstruct_->params()), INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", - ccstruct_->params()), + ccstruct_->params()), INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level", - ccstruct_->params()), + ccstruct_->params()), double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs", ccstruct_->params()), double_MEMBER(textord_noise_area_ratio, 0.7, - "Fraction of bounding box for noise", - ccstruct_->params()), + "Fraction of bounding box for noise", ccstruct_->params()), double_MEMBER(textord_blob_size_smallile, 20, - "Percentile for small blobs", - ccstruct_->params()), + "Percentile for small blobs", ccstruct_->params()), double_MEMBER(textord_initialx_ile, 0.75, - "Ile of sizes for xheight guess", - ccstruct_->params()), + "Ile of sizes for xheight guess", ccstruct_->params()), double_MEMBER(textord_initialasc_ile, 0.90, - "Ile of sizes for xheight guess", - ccstruct_->params()), - INT_MEMBER(textord_noise_sizefraction, 10, - "Fraction of size for maxima", + "Ile of sizes for xheight guess", ccstruct_->params()), + INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima", ccstruct_->params()), double_MEMBER(textord_noise_sizelimit, 0.5, - "Fraction of x for big t count", - ccstruct_->params()), + "Fraction of x for big t count", ccstruct_->params()), INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", ccstruct_->params()), double_MEMBER(textord_noise_normratio, 2.0, - "Dot to norm ratio for deletion", - ccstruct_->params()), + "Dot to norm ratio for deletion", ccstruct_->params()), BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", ccstruct_->params()), BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", @@ -242,24 +211,20 @@ Textord::Textord(CCStruct* ccstruct) "xh fract height error for norm blobs", ccstruct_->params()), double_MEMBER(textord_noise_sxfract, 0.4, - "xh fract width error for norm blobs", - ccstruct_->params()), - double_MEMBER(textord_noise_hfract, 1.0/64, + "xh fract width error for norm blobs", ccstruct_->params()), + double_MEMBER(textord_noise_hfract, 1.0 / 64, "Height fraction to discard outlines as speckle noise", ccstruct_->params()), INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", ccstruct_->params()), double_MEMBER(textord_noise_rowratio, 6.0, - "Dot to norm ratio for deletion", - ccstruct_->params()), + "Dot to norm ratio for deletion", ccstruct_->params()), BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", ccstruct_->params()), double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", ccstruct_->params()), double_MEMBER(textord_blshift_xfraction, 9.99, - "Min size of baseline shift", - ccstruct_->params()) { -} + "Min size of baseline shift", ccstruct_->params()) {} Textord::~Textord() { } @@ -324,10 +289,9 @@ void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew, BaselineDetect baseline_detector(textord_baseline_debug, reskew, to_blocks); baseline_detector.ComputeStraightBaselines(use_box_bottoms); - baseline_detector.ComputeBaselineSplinesAndXheights(page_tr_, true, - textord_heavy_nr, - textord_show_final_rows, - this); + baseline_detector.ComputeBaselineSplinesAndXheights( + page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr, + textord_show_final_rows, this); // Now make the words in the lines. if (PSM_WORD_FIND_ENABLED(pageseg_mode)) { // SINGLE_LINE uses the old word maker on the single line. diff --git a/textord/topitch.cpp b/textord/topitch.cpp index ae9999f7db..3dda815777 100644 --- a/textord/topitch.cpp +++ b/textord/topitch.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: topitch.cpp (Formerly to_pitch.c) * Description: Code to determine fixed pitchness and the pitch if fixed. - * Author: Ray Smith - * Created: Tue Aug 24 16:57:29 BST 1993 + * Author: Ray Smith + * Created: Tue Aug 24 16:57:29 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -1084,7 +1084,7 @@ BOOL8 count_pitch_stats( //find lines return FALSE; prev_valid = FALSE; prev_centre = 0; - prev_right = 0; //stop compiler warning + prev_right = 0; // stop compiler warning joined_box = blob_it.data ()->bounding_box (); do { blob_it.forward (); diff --git a/textord/tospace.cpp b/textord/tospace.cpp index 025634e1af..8a290772af 100644 --- a/textord/tospace.cpp +++ b/textord/tospace.cpp @@ -428,9 +428,8 @@ void Textord::row_spacing_stats( if (suspected_table && (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) { if (tosp_debug_level > 5) - tprintf ("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n", - block_idx, row_idx, - row->kern_size, row->space_threshold, row->space_size); + tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n", block_idx, + row_idx, row->kern_size, row->space_threshold, row->space_size); row->space_threshold = (inT32) (tosp_table_kn_sp_ratio * row->kern_size); row->space_size = MAX (row->space_threshold + 1, row->xheight); @@ -450,10 +449,9 @@ void Textord::row_spacing_stats( MAX (tosp_min_sane_kn_sp * MAX (row->kern_size, 2.5), row->xheight / 2); if (tosp_debug_level > 5) - tprintf - ("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n", - block_idx, row_idx, row->kern_size, row->space_threshold, - row->space_size, sane_space); + tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n", + block_idx, row_idx, row->kern_size, row->space_threshold, + row->space_size, sane_space); row->space_size = sane_space; row->space_threshold = inT32 (floor ((row->space_size + row->kern_size) / @@ -507,7 +505,7 @@ void Textord::row_spacing_stats( MIN (inT32 (ceil (tosp_fuzzy_space_factor * row->xheight)), inT32 (row->space_size)); if (row->min_space <= row->space_threshold) - //Don't be silly + // Don't be silly row->min_space = row->space_threshold + 1; /* Lets try to guess the max certain kern gap by looking at the cluster of @@ -568,7 +566,7 @@ void Textord::row_spacing_stats( row->kern_size)); } if (row->max_nonspace > row->space_threshold) { - //Don't be silly + // Don't be silly row->max_nonspace = row->space_threshold; } @@ -709,8 +707,8 @@ BOOL8 Textord::isolated_row_stats(TO_ROW *row, ((small_gaps_count / (float) total) < tosp_enough_small_gaps) || (total - small_gaps_count < 1)) { if (tosp_debug_level > 5) - tprintf ("B:%d R:%d -- Can't do isolated row stats.\n", - block_idx, row_idx); + tprintf("B:%d R:%d -- Can't do isolated row stats.\n", block_idx, + row_idx); return FALSE; } blob_it.set_to_list (row->blob_list ()); @@ -1139,10 +1137,10 @@ ROW *Textord::make_prop_words( else blanks = 0; if (tosp_debug_level > 5) - tprintf - ("Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n", - word->bounding_box ().left (), word->bounding_box ().bottom (), - repetition_spacing, current_gap, blanks); + tprintf( + "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n", + word->bounding_box().left(), word->bounding_box().bottom(), + repetition_spacing, current_gap, blanks); word->set_blanks (blanks); //NO uncertainty word->set_flag (W_FUZZY_SP, FALSE); @@ -1695,10 +1693,9 @@ void Textord::mark_gap( blob.bottom () + blob.height () / 2.0f); } if (tosp_debug_level > 5) - tprintf (" (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n", - blob.left () - current_gap / 2, blob.bottom (), rule, - prev_gap, prev_blob_width, current_gap, - next_blob_width, next_gap); + tprintf(" (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n", + blob.left() - current_gap / 2, blob.bottom(), rule, prev_gap, + prev_blob_width, current_gap, next_blob_width, next_gap); } #endif @@ -1736,8 +1733,7 @@ BOOL8 Textord::ignore_big_gap(TO_ROW *row, inT16 right) { inT16 gap = right - left + 1; - if (tosp_ignore_big_gaps > 999) - return FALSE; //Don't ignore + if (tosp_ignore_big_gaps > 999) return FALSE; // Don't ignore if (tosp_ignore_big_gaps > 0) return (gap > tosp_ignore_big_gaps * row->xheight); if (gap > tosp_ignore_very_big_gaps * row->xheight) @@ -1759,7 +1755,6 @@ BOOL8 Textord::ignore_big_gap(TO_ROW *row, return FALSE; } - /********************************************************************** * reduced_box_next * diff --git a/textord/tovars.cpp b/textord/tovars.cpp index 71114358ac..6b1b833248 100644 --- a/textord/tovars.cpp +++ b/textord/tovars.cpp @@ -1,8 +1,8 @@ /********************************************************************** * File: tovars.cpp (Formerly to_vars.c) * Description: Variables used by textord. - * Author: Ray Smith - * Created: Tue Aug 24 16:55:02 BST 1993 + * Author: Ray Smith + * Created: Tue Aug 24 16:55:02 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -49,8 +49,8 @@ EXTERN double_VAR (textord_words_default_minspace, 0.6, EXTERN double_VAR (textord_words_min_minspace, 0.3, "Fraction of xheight"); EXTERN double_VAR (textord_words_default_nonspace, 0.2, "Fraction of xheight"); -EXTERN double_VAR (textord_words_initial_lower, 0.25, -"Max initial cluster size"); +EXTERN double_VAR(textord_words_initial_lower, 0.25, + "Max initial cluster size"); EXTERN double_VAR (textord_words_initial_upper, 0.15, "Min initial cluster spacing"); EXTERN double_VAR (textord_words_minlarge, 0.75, @@ -67,7 +67,7 @@ EXTERN double_VAR (textord_pitch_rowsimilarity, 0.08, "Fraction of xheight for sameness"); EXTERN BOOL_VAR (textord_pitch_scalebigwords, FALSE, "Scale scores on big words"); -EXTERN double_VAR (words_initial_lower, 0.5, "Max initial cluster size"); +EXTERN double_VAR(words_initial_lower, 0.5, "Max initial cluster size"); EXTERN double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing"); EXTERN double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight"); EXTERN double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight"); diff --git a/textord/tovars.h b/textord/tovars.h index 99edae1d2c..46315bb96b 100644 --- a/textord/tovars.h +++ b/textord/tovars.h @@ -1,8 +1,8 @@ /********************************************************************** * File: tovars.h (Formerly to_vars.h) * Description: Variables used by textord. - * Author: Ray Smith - * Created: Tue Aug 24 16:55:02 BST 1993 + * Author: Ray Smith + * Created: Tue Aug 24 16:55:02 BST 1993 * * (C) Copyright 1993, Hewlett-Packard Ltd. ** Licensed under the Apache License, Version 2.0 (the "License"); @@ -51,8 +51,8 @@ extern double_VAR_H (textord_words_default_minspace, 0.6, extern double_VAR_H (textord_words_min_minspace, 0.3, "Fraction of xheight"); extern double_VAR_H (textord_words_default_nonspace, 0.2, "Fraction of xheight"); -extern double_VAR_H (textord_words_initial_lower, 0.25, -"Max initial cluster size"); +extern double_VAR_H(textord_words_initial_lower, 0.25, + "Max initial cluster size"); extern double_VAR_H (textord_words_initial_upper, 0.15, "Min initial cluster spacing"); extern double_VAR_H (textord_words_minlarge, 0.75, @@ -69,7 +69,7 @@ extern double_VAR_H (textord_pitch_rowsimilarity, 0.08, "Fraction of xheight for sameness"); extern BOOL_VAR_H (textord_pitch_scalebigwords, FALSE, "Scale scores on big words"); -extern double_VAR_H (words_initial_lower, 0.5, "Max initial cluster size"); +extern double_VAR_H(words_initial_lower, 0.5, "Max initial cluster size"); extern double_VAR_H (words_initial_upper, 0.15, "Min initial cluster spacing"); extern double_VAR_H (words_default_prop_nonspace, 0.25, diff --git a/training/classifier_tester.cpp b/training/classifier_tester.cpp index 48f3781ebb..ed7e50cd2f 100644 --- a/training/classifier_tester.cpp +++ b/training/classifier_tester.cpp @@ -48,9 +48,9 @@ enum ClassifierName { const char* names[] = {"pruner", "full", #ifndef NO_CUBE_BUILD - "cube", "cubetess", + "cube", "cubetess", #endif // NO_CUBE_BUILD - NULL }; + NULL}; static tesseract::ShapeClassifier* InitializeClassifier( const char* classifer_name, const UNICHARSET& unicharset, @@ -80,7 +80,7 @@ static tesseract::ShapeClassifier* InitializeClassifier( tesseract::Classify* classify = NULL; if ( #ifndef NO_CUBE_BUILD - classifier == CN_CUBE || classifier == CN_CUBETESS || + classifier == CN_CUBE || classifier == CN_CUBETESS || #endif // NO_CUBE_BUILD classifier == CN_PRUNER || classifier == CN_FULL) { #ifndef NO_CUBE_BUILD diff --git a/training/cntraining.cpp b/training/cntraining.cpp index ab19ddb93d..916a758576 100644 --- a/training/cntraining.cpp +++ b/training/cntraining.cpp @@ -20,7 +20,6 @@ ** limitations under the License. ******************************************************************************/ - /*---------------------------------------------------------------------------- Include Files and Type Defines ----------------------------------------------------------------------------*/ @@ -53,10 +52,8 @@ int main ( Private Function Prototypes ----------------------------------------------------------------------------*/ -void WriteNormProtos ( - const char *Directory, - LIST LabeledProtoList, - CLUSTERER *Clusterer); +void WriteNormProtos (const char *Directory, LIST LabeledProtoList, + CLUSTERER *Clusterer); /* PARAMDESC *ConvertToPARAMDESC( @@ -81,7 +78,6 @@ CLUSTERCONFIG CNConfig = elliptical, 0.025, 0.05, 0.8, 1e-3, 0 }; - /*---------------------------------------------------------------------------- Public Code ----------------------------------------------------------------------------*/ @@ -134,8 +130,7 @@ CLUSTERCONFIG CNConfig = * @note Exceptions: none * @note History: Fri Aug 18 08:56:17 1989, DSJ, Created. */ -int main(int argc, char* argv[]) -{ +int main(int argc, char *argv[]) { // Set the global Config parameters before parsing the command line. Config = CNConfig; @@ -221,10 +216,8 @@ int main(int argc, char* argv[]) * @note Exceptions: none * @note History: Fri Aug 18 16:17:06 1989, DSJ, Created. */ -void WriteNormProtos ( - const char *Directory, - LIST LabeledProtoList, - CLUSTERER *Clusterer) +void WriteNormProtos(const char *Directory, LIST LabeledProtoList, + CLUSTERER *Clusterer) { FILE *File; STRING Filename; @@ -240,8 +233,8 @@ void WriteNormProtos ( Filename += "normproto"; printf ("\nWriting %s ...", Filename.string()); File = Efopen (Filename.string(), "wb"); - fprintf(File,"%0d\n",Clusterer->SampleSize); - WriteParamDesc(File,Clusterer->SampleSize,Clusterer->ParamDesc); + fprintf(File, "%0d\n", Clusterer->SampleSize); + WriteParamDesc(File, Clusterer->SampleSize,Clusterer->ParamDesc); iterate(LabeledProtoList) { LabeledProto = (LABELEDLIST) first_node (LabeledProtoList); diff --git a/training/commandlineflags.cpp b/training/commandlineflags.cpp index d8cb371828..56dcb84221 100644 --- a/training/commandlineflags.cpp +++ b/training/commandlineflags.cpp @@ -115,36 +115,36 @@ void PrintCommandLineFlags() { if (!strncmp(GlobalParams()->int_params[i]->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) { printf(" --%s %s (type:int default:%d)\n", - GlobalParams()->int_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->int_params[i]->info_str(), - inT32(*(GlobalParams()->int_params[i]))); + GlobalParams()->int_params[i]->name_str() + kFlagNamePrefixLen, + GlobalParams()->int_params[i]->info_str(), + inT32(*(GlobalParams()->int_params[i]))); } } for (int i = 0; i < GlobalParams()->double_params.size(); ++i) { if (!strncmp(GlobalParams()->double_params[i]->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) { printf(" --%s %s (type:double default:%g)\n", - GlobalParams()->double_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->double_params[i]->info_str(), - static_cast(*(GlobalParams()->double_params[i]))); + GlobalParams()->double_params[i]->name_str() + kFlagNamePrefixLen, + GlobalParams()->double_params[i]->info_str(), + static_cast(*(GlobalParams()->double_params[i]))); } } for (int i = 0; i < GlobalParams()->bool_params.size(); ++i) { if (!strncmp(GlobalParams()->bool_params[i]->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) { printf(" --%s %s (type:bool default:%s)\n", - GlobalParams()->bool_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->bool_params[i]->info_str(), - (BOOL8(*(GlobalParams()->bool_params[i])) ? "true" : "false")); + GlobalParams()->bool_params[i]->name_str() + kFlagNamePrefixLen, + GlobalParams()->bool_params[i]->info_str(), + (BOOL8(*(GlobalParams()->bool_params[i])) ? "true" : "false")); } } for (int i = 0; i < GlobalParams()->string_params.size(); ++i) { if (!strncmp(GlobalParams()->string_params[i]->name_str(), kFlagNamePrefix, kFlagNamePrefixLen)) { printf(" --%s %s (type:string default:%s)\n", - GlobalParams()->string_params[i]->name_str() + kFlagNamePrefixLen, - GlobalParams()->string_params[i]->info_str(), - GlobalParams()->string_params[i]->string()); + GlobalParams()->string_params[i]->name_str() + kFlagNamePrefixLen, + GlobalParams()->string_params[i]->info_str(), + GlobalParams()->string_params[i]->string()); } } } diff --git a/training/commontraining.cpp b/training/commontraining.cpp index df2521f513..b7243e6f3f 100644 --- a/training/commontraining.cpp +++ b/training/commontraining.cpp @@ -39,8 +39,8 @@ #include using tesseract::CCUtil; -using tesseract::FontInfo; using tesseract::IntFeatureSpace; +using tesseract::FontInfo; using tesseract::ParamUtils; using tesseract::ShapeTable; @@ -312,9 +312,7 @@ const char *GetNextFilename(int argc, const char* const * argv) { return argv[tessoptind++]; else return NULL; -} /* GetNextFilename */ - - +} /* GetNextFilename */ /*---------------------------------------------------------------------------*/ /** @@ -328,11 +326,8 @@ const char *GetNextFilename(int argc, const char* const * argv) { * @note Exceptions: none * @note History: Fri Aug 18 15:57:41 1989, DSJ, Created. */ -LABELEDLIST FindList ( - LIST List, - char *Label) -{ - LABELEDLIST LabeledList; +LABELEDLIST FindList(LIST List, char* Label) { + LABELEDLIST LabeledList; iterate (List) { @@ -342,7 +337,7 @@ LABELEDLIST FindList ( } return (NULL); -} /* FindList */ +} /* FindList */ /*---------------------------------------------------------------------------*/ /** @@ -354,10 +349,8 @@ LABELEDLIST FindList ( * @note Exceptions: none * @note History: Fri Aug 18 16:08:46 1989, DSJ, Created. */ -LABELEDLIST NewLabeledList ( - const char *Label) -{ - LABELEDLIST LabeledList; +LABELEDLIST NewLabeledList(const char* Label) { + LABELEDLIST LabeledList; LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE)); LabeledList->Label = (char*)Emalloc (strlen (Label)+1); @@ -367,7 +360,7 @@ LABELEDLIST NewLabeledList ( LabeledList->font_sample_count = 0; return (LabeledList); -} /* NewLabeledList */ +} /* NewLabeledList */ /*---------------------------------------------------------------------------*/ // TODO(rays) This is now used only by cntraining. Convert cntraining to use @@ -386,7 +379,7 @@ LABELEDLIST NewLabeledList ( * @return none * @note Globals: none * @note Exceptions: none - * @note History: + * @note History: * - Fri Aug 18 13:11:39 1989, DSJ, Created. * - Tue May 17 1998 simplifications to structure, illiminated * font, and feature specification levels of structure. @@ -460,11 +453,10 @@ void FreeTrainingSamples(LIST CharList) { FEATURE_SET FeatureSet; LIST FeatureList; - - iterate(CharList) { /* iterate through all of the fonts */ + iterate(CharList) { /* iterate through all of the fonts */ char_sample = (LABELEDLIST) first_node(CharList); FeatureList = char_sample->List; - iterate(FeatureList) { /* iterate through all of the classes */ + iterate(FeatureList) { /* iterate through all of the classes */ FeatureSet = (FEATURE_SET) first_node(FeatureList); FreeFeatureSet(FeatureSet); } @@ -535,12 +527,12 @@ CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, if ( Sample != NULL ) free( Sample ); return( Clusterer ); -} /* SetUpForClustering */ +} /* SetUpForClustering */ /*------------------------------------------------------------------------*/ void MergeInsignificantProtos(LIST ProtoList, const char* label, - CLUSTERER *Clusterer, CLUSTERCONFIG *Config) { - PROTOTYPE *Prototype; + CLUSTERER* Clusterer, CLUSTERCONFIG* Config) { + PROTOTYPE* Prototype; bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0; LIST pProtoList = ProtoList; @@ -600,7 +592,7 @@ void MergeInsignificantProtos(LIST ProtoList, const char* label, Prototype->Significant = true; } } -} /* MergeInsignificantProtos */ +} /* MergeInsignificantProtos */ /*-----------------------------------------------------------------------------*/ void CleanUpUnusedData( @@ -695,14 +687,11 @@ LIST RemoveInsignificantProtos( } FreeProtoList(&ProtoList); return (NewProtoList); -} /* RemoveInsignificantProtos */ +} /* RemoveInsignificantProtos */ /*----------------------------------------------------------------------------*/ -MERGE_CLASS FindClass ( - LIST List, - const char *Label) -{ - MERGE_CLASS MergeClass; +MERGE_CLASS FindClass(LIST List, const char* Label) { + MERGE_CLASS MergeClass; iterate (List) { @@ -712,13 +701,11 @@ MERGE_CLASS FindClass ( } return (NULL); -} /* FindClass */ +} /* FindClass */ /*---------------------------------------------------------------------------*/ -MERGE_CLASS NewLabeledClass ( - const char *Label) -{ - MERGE_CLASS MergeClass; +MERGE_CLASS NewLabeledClass(const char* Label) { + MERGE_CLASS MergeClass; MergeClass = new MERGE_CLASS_NODE; MergeClass->Label = (char*)Emalloc (strlen (Label)+1); @@ -726,7 +713,7 @@ MERGE_CLASS NewLabeledClass ( MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS); return (MergeClass); -} /* NewLabeledClass */ +} /* NewLabeledClass */ /*-----------------------------------------------------------------------------*/ /** @@ -738,38 +725,36 @@ MERGE_CLASS NewLabeledClass ( * @note Exceptions: none * @note History: Fri Aug 18 17:44:27 1989, DSJ, Created. */ -void FreeLabeledClassList ( - LIST ClassList) -{ - MERGE_CLASS MergeClass; +void FreeLabeledClassList(LIST ClassList) { + MERGE_CLASS MergeClass; - iterate (ClassList) /* iterate through all of the fonts */ + iterate(ClassList) /* iterate through all of the fonts */ { MergeClass = (MERGE_CLASS) first_node (ClassList); free (MergeClass->Label); FreeClass(MergeClass->Class); delete MergeClass; } - destroy (ClassList); + destroy(ClassList); -} /* FreeLabeledClassList */ +} /* FreeLabeledClassList */ /* SetUpForFloat2Int */ CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset, LIST LabeledClassList) { - MERGE_CLASS MergeClass; - CLASS_TYPE Class; - int NumProtos; - int NumConfigs; - int NumWords; - int i, j; - float Values[3]; - PROTO NewProto; - PROTO OldProto; - BIT_VECTOR NewConfig; - BIT_VECTOR OldConfig; - - // printf("Float2Int ...\n"); + MERGE_CLASS MergeClass; + CLASS_TYPE Class; + int NumProtos; + int NumConfigs; + int NumWords; + int i, j; + float Values[3]; + PROTO NewProto; + PROTO OldProto; + BIT_VECTOR NewConfig; + BIT_VECTOR OldConfig; + + // printf("Float2Int ...\n"); CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()]; iterate(LabeledClassList) @@ -835,20 +820,19 @@ void Normalize ( } // Normalize /*-------------------------------------------------------------------------*/ -void FreeNormProtoList ( - LIST CharList) +void FreeNormProtoList(LIST CharList) { - LABELEDLIST char_sample; + LABELEDLIST char_sample; - iterate (CharList) /* iterate through all of the fonts */ + iterate(CharList) /* iterate through all of the fonts */ { char_sample = (LABELEDLIST) first_node (CharList); FreeLabeledList (char_sample); } - destroy (CharList); + destroy(CharList); -} // FreeNormProtoList +} // FreeNormProtoList /*---------------------------------------------------------------------------*/ void AddToNormProtosList( @@ -869,19 +853,16 @@ void AddToNormProtosList( } /*---------------------------------------------------------------------------*/ -int NumberOfProtos( - LIST ProtoList, - BOOL8 CountSigProtos, - BOOL8 CountInsigProtos) -{ +int NumberOfProtos(LIST ProtoList, BOOL8 CountSigProtos, + BOOL8 CountInsigProtos) { int N = 0; - PROTOTYPE *Proto; + PROTOTYPE* Proto; iterate(ProtoList) { Proto = (PROTOTYPE *) first_node ( ProtoList ); - if (( Proto->Significant && CountSigProtos ) || - ( ! Proto->Significant && CountInsigProtos ) ) + if ((Proto->Significant && CountSigProtos) || + (!Proto->Significant && CountInsigProtos)) N++; } return(N); diff --git a/training/stringrenderer.h b/training/stringrenderer.h index 942b7fddce..f0ba0c0b00 100644 --- a/training/stringrenderer.h +++ b/training/stringrenderer.h @@ -90,7 +90,7 @@ class StringRenderer { void set_underline_style(const PangoUnderline style) { underline_style_ = style; } - void set_features(const char *features) { + void set_features(const char* features) { free(features_); features_ = strdup(features); } @@ -130,12 +130,8 @@ class StringRenderer { const PangoFontInfo& font() const { return font_; } - int h_margin() const { - return h_margin_; - } - int v_margin() const { - return v_margin_; - } + int h_margin() const { return h_margin_; } + int v_margin() const { return v_margin_; } // Get the boxchars of all clusters rendered thus far (or since the last call // to ClearBoxes()). @@ -148,6 +144,9 @@ class StringRenderer { void RotatePageBoxes(float rotation); // Delete all boxes. void ClearBoxes(); + // Returns the boxes in a boxfile string. + string GetBoxesStr(); + // Writes the boxes to a boxfile. void WriteAllBoxes(const string& filename); // Removes space-delimited words from the string that are not renderable by // the current font and returns the count of such words. @@ -189,7 +188,7 @@ class StringRenderer { double underline_start_prob_; double underline_continuation_prob_; PangoUnderline underline_style_; - char *features_; + char* features_; // Text filtering options bool drop_uncovered_chars_; bool strip_unrenderable_words_; @@ -211,7 +210,7 @@ class StringRenderer { Boxa* page_boxes_; // Objects cached for subsequent calls to RenderAllFontsToImage() - hash_map char_map_; // Time-saving char histogram. + TessHashMap char_map_; // Time-saving char histogram. int total_chars_; // Number in the string to be rendered. int font_index_; // Index of next font to use in font list. int last_offset_; // Offset returned from last successful rendering diff --git a/training/text2image.cpp b/training/text2image.cpp index 406669dc10..946f6facda 100644 --- a/training/text2image.cpp +++ b/training/text2image.cpp @@ -251,6 +251,8 @@ void ExtractFontProperties(const string &utf8_text, // the input consists of the separated characters. NOTE(ranjith): As per // behdad@ this is not currently controllable at the level of the Pango // API. + // The most frequent of all is a single character "word" made by the CJK + // segmenter. // Safeguard against these cases here by just skipping the bigram. if (IsWhitespaceBox(boxes[b+1])) { continue; @@ -445,7 +447,7 @@ int main(int argc, char** argv) { string pango_name; if (!FontUtils::IsAvailableFont(FLAGS_font.c_str(), &pango_name)) { tprintf("Could not find font named %s.\n", FLAGS_font.c_str()); - if (!pango_name.empty()) { + if (!pango_name.empty()) { tprintf("Pango suggested font %s.\n", pango_name.c_str()); } tprintf("Please correct --font arg.\n"); @@ -523,7 +525,7 @@ int main(int argc, char** argv) { if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() && !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) { tprintf("Failed to load unicharset from file %s\n", - FLAGS_unicharset_file.c_str()); + FLAGS_unicharset_file.c_str()); exit(1); } @@ -604,7 +606,8 @@ int main(int argc, char** argv) { rotation = -1 * page_rotation[page_num]; } if (FLAGS_degrade_image) { - pix = DegradeImage(pix, FLAGS_exposure, &randomizer, FLAGS_rotate_image ? &rotation : NULL); + pix = DegradeImage(pix, FLAGS_exposure, &randomizer, + FLAGS_rotate_image ? &rotation : NULL); } render.RotatePageBoxes(rotation); diff --git a/viewer/scrollview.cpp b/viewer/scrollview.cpp index ac059d5469..f10b789ea8 100644 --- a/viewer/scrollview.cpp +++ b/viewer/scrollview.cpp @@ -37,7 +37,7 @@ #include "scrollview.h" #ifdef _MSC_VER -#pragma warning(disable:4786) // Don't give stupid warnings for stl +#pragma warning(disable:4786) // Don't give irrelevant warnings for stl #pragma warning(disable:4018) // signed/unsigned warnings #pragma warning(disable:4530) // exception warnings #endif diff --git a/viewer/svutil.h b/viewer/svutil.h index ccfce917fe..667c052083 100644 --- a/viewer/svutil.h +++ b/viewer/svutil.h @@ -26,6 +26,7 @@ #ifdef _WIN32 #ifndef __GNUC__ +#include "platform.h" #include #if defined(_MSC_VER) && _MSC_VER < 1900 #define snprintf _snprintf @@ -102,6 +103,17 @@ class SVMutex { #endif }; +// Auto-unlocking object that locks a mutex on construction and unlocks it +// on destruction. +class SVAutoLock { + public: + explicit SVAutoLock(SVMutex* mutex) : mutex_(mutex) { mutex->Lock(); } + ~SVAutoLock() { mutex_->Unlock(); } + + private: + SVMutex* mutex_; +}; + /// The SVNetwork class takes care of the remote connection for ScrollView /// This means setting up and maintaining a remote connection, sending and /// receiving messages and closing the connection. diff --git a/wordrec/associate.h b/wordrec/associate.h index 3d6fc44708..10b1e0b7d9 100644 --- a/wordrec/associate.h +++ b/wordrec/associate.h @@ -47,9 +47,7 @@ struct AssociateStats { gap_sum = 0; } - void Print() { - tprintf("AssociateStats: w(%g %d)\n", shape_cost, bad_shape); - } + void Print() { tprintf("AssociateStats: s(%g %d)\n", shape_cost, bad_shape); } float shape_cost; // cost of blob shape bool bad_shape; // true if the shape of the blob is unacceptable diff --git a/wordrec/lm_state.h b/wordrec/lm_state.h index 623bbb5e7f..6229e9b350 100644 --- a/wordrec/lm_state.h +++ b/wordrec/lm_state.h @@ -48,8 +48,8 @@ typedef unsigned char LanguageModelFlagsType; /// Each ViterbiStateEntry contains information from various components of the /// language model: dawgs in which the path is found, character ngram model /// probability of the path, script/chartype/font consistency info, state for -/// language-specific heuristics (e.g. hyphenated and compound words, lower/upper -/// case preferences, etc). +/// language-specific heuristics (e.g. hyphenated and compound words, +/// lower/upper case preferences, etc). /// /// Each ViterbiStateEntry also contains the parent pointer, so that the path /// that it represents (WERD_CHOICE) can be constructed by following these @@ -165,13 +165,13 @@ struct ViterbiStateEntry : public ELIST_LINK { /// Various information about the characters on the path represented /// by this ViterbiStateEntry. - float ratings_sum; //< sum of ratings of character on the path - float min_certainty; //< minimum certainty on the path - int adapted; //< number of BLOB_CHOICES from adapted templates - int length; //< number of characters on the path + float ratings_sum; //< sum of ratings of character on the path + float min_certainty; //< minimum certainty on the path + int adapted; //< number of BLOB_CHOICES from adapted templates + int length; //< number of characters on the path float outline_length; //< length of the outline so far LMConsistencyInfo consistency_info; //< path consistency info - AssociateStats associate_stats; //< character widths/gaps/seams + AssociateStats associate_stats; //< character widths/gaps/seams /// Flags for marking the entry as a top choice path with /// the smallest rating or lower/upper case letters). diff --git a/wordrec/measure.h b/wordrec/measure.h index 9c73906853..894938e55a 100644 --- a/wordrec/measure.h +++ b/wordrec/measure.h @@ -60,10 +60,9 @@ typedef struct * Add one more sample to a measurement. **********************************************************************/ -#define ADD_SAMPLE(m,s) \ -(m.sum_of_samples += (float) (s), \ - m.sum_of_squares += (float) (s) * (float) (s), \ - ++m.num_samples) +#define ADD_SAMPLE(m, s) \ + (m.sum_of_samples += (float)(s), \ + m.sum_of_squares += (float)(s) * (float)(s), ++m.num_samples) /********************************************************************** * mean @@ -71,10 +70,8 @@ typedef struct * Return the mean value of the measurement. **********************************************************************/ -#define MEAN(m) \ -((m).num_samples ? \ - ((float) ((m).sum_of_samples / (m).num_samples)) : \ - 0) +#define MEAN(m) \ + ((m).num_samples ? ((float)((m).sum_of_samples / (m).num_samples)) : 0) /********************************************************************** * new_measurement @@ -83,10 +80,8 @@ typedef struct * samples. **********************************************************************/ -#define new_measurement(m) \ -((m).num_samples = 0, \ - (m).sum_of_samples = 0, \ - (m).sum_of_squares = 0) +#define new_measurement(m) \ + ((m).num_samples = 0, (m).sum_of_samples = 0, (m).sum_of_squares = 0) /********************************************************************** * number_of_samples @@ -112,13 +107,12 @@ typedef struct * Return the variance of the measurement. **********************************************************************/ -#define VARIANCE(m) \ -(((m).num_samples > 1) ? \ - ((float) \ - (((m).num_samples * (m).sum_of_squares - \ - (m).sum_of_samples * (m).sum_of_samples) / \ - (((m).num_samples - 1) * (m).num_samples))) : \ - 0) +#define VARIANCE(m) \ + (((m).num_samples > 1) \ + ? ((float)(((m).num_samples * (m).sum_of_squares - \ + (m).sum_of_samples * (m).sum_of_samples) / \ + (((m).num_samples - 1) * (m).num_samples))) \ + : 0) /********************************************************************** * print_summary @@ -126,10 +120,8 @@ typedef struct * Summarize a MEASUREMENT record. **********************************************************************/ -#define print_summary(string,measure) \ -cprintf ("\t%-20s \tn = %d, \tm = %4.2f, \ts = %4.2f\n ", \ - string, \ - number_of_samples (measure), \ - MEAN (measure), \ - standard_deviation (measure)) +#define print_summary(string, measure) \ + cprintf("\t%-20s \tn = %d, \tm = %4.2f, \ts = %4.2f\n ", string, \ + number_of_samples(measure), MEAN(measure), \ + standard_deviation(measure)) #endif diff --git a/wordrec/pieces.cpp b/wordrec/pieces.cpp index 04e340396e..f7b406d5bc 100644 --- a/wordrec/pieces.cpp +++ b/wordrec/pieces.cpp @@ -267,7 +267,6 @@ void Wordrec::merge_and_put_fragment_lists(inT16 row, inT16 column, delete [] choice_lists_it; } - /********************************************************************** * get_fragment_lists * From bf0f9013efa5a01b918da2dcb370317e0659a65c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 25 Nov 2016 15:16:39 +0100 Subject: [PATCH 023/132] add license info to autogen.sh --- autogen.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/autogen.sh b/autogen.sh index ac44d35770..a551bb4b4d 100755 --- a/autogen.sh +++ b/autogen.sh @@ -1,4 +1,13 @@ #!/bin/sh +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # This is a simple script which is meant to help developers # better deal with the GNU autotools, specifically: From 6dcafe64f85f9f3de02585c5be79da64ea847b4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Sun, 27 Nov 2016 21:26:29 +0100 Subject: [PATCH 024/132] revert TessHashMap to hash_map in training/stringrenderer.h from 90651e1 (to fix build) --- training/stringrenderer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/stringrenderer.h b/training/stringrenderer.h index f0ba0c0b00..9b72cf6e3a 100644 --- a/training/stringrenderer.h +++ b/training/stringrenderer.h @@ -210,7 +210,7 @@ class StringRenderer { Boxa* page_boxes_; // Objects cached for subsequent calls to RenderAllFontsToImage() - TessHashMap char_map_; // Time-saving char histogram. + hash_map char_map_; // Time-saving char histogram. int total_chars_; // Number in the string to be rendered. int font_index_; // Index of next font to use in font list. int last_offset_; // Offset returned from last successful rendering From 0dccbeddcb7495555a126cc150fd951814a534df Mon Sep 17 00:00:00 2001 From: Amit D Date: Sun, 27 Nov 2016 00:04:05 +0200 Subject: [PATCH 025/132] AUTHORS: Add more contributors and fix typo --- AUTHORS | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 0615b38a22..4d9c75c4aa 100644 --- a/AUTHORS +++ b/AUTHORS @@ -9,6 +9,7 @@ Simon Crouch David Eger Sheelagh Huddleston Dan Johnson +Rajesh Katikam Thomas Kielbus Dar-Shyang Lee Zongyi (Joe) Liu @@ -28,13 +29,14 @@ Ping Ping Xiu Andrew Ziem Oscar Zuniga -Community Contributers: +Community Contributors: Zdenko Podobný (Maintainer) Jim Regan (Maintainer) James R Barlow Amit Dovev Martin Ettl Tom Morris +Tobias Müller Egor Pugin Sundar M. Vaidya Stefan Weil From 6d47e9027dc7284bb83441162c624a120e8f07bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Sun, 27 Nov 2016 21:39:23 +0100 Subject: [PATCH 026/132] use leptonica from master git repository (1.74) --- cppan.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cppan.yml b/cppan.yml index 98738d91ed..8065da79e4 100644 --- a/cppan.yml +++ b/cppan.yml @@ -121,4 +121,4 @@ dependencies: private: # tesseract uses leptonica only internally # and does not expose its interface to users - pvt.cppan.demo.leptonica: 1.73 + pvt.cppan.demo.leptonica: master From fe9fa88f1ddc167d2179583373759dddcc7950a9 Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Mon, 28 Nov 2016 08:55:03 -0800 Subject: [PATCH 027/132] Missing pdf font file from previous sync --- tessdata/pdf.ttf | Bin 572 -> 572 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tessdata/pdf.ttf b/tessdata/pdf.ttf index 578974a9e872e6435350496b2a4619d4277a5b70..8affa23180a49f38ab66b095fabce1c299a7ee93 100644 GIT binary patch delta 67 zcmdnPvWI1YgAkWd=0OGqhBZKJmXVQ~$ip--C`o3w?1Zp*ew(i>>>NM|28Q#q{^Wt^ T%e4xA6E8?GaWHIr6~qVtc*zqY delta 67 zcmdnPvWI1YgAnHg4j%>vhBZKJmXVQ~$ih4^C`sl_La;+Tzs*+`b`GEf1H<`QfAT=| TrCa-MPP`z&#K5@mRS+WpZ+jFW From 8a7de59f5d72b4579e64ee7dab1eb36affe294c5 Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Mon, 28 Nov 2016 09:39:17 -0800 Subject: [PATCH 028/132] Fixed the memory leak/double free cleanly --- classify/adaptmatch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index 6e995c2159..c02efed7aa 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -819,7 +819,7 @@ int Classify::GetAdaptiveFeatures(TBLOB *Blob, Features = ExtractPicoFeatures(Blob); NumFeatures = Features->NumFeatures; - if (NumFeatures > UNLIKELY_NUM_FEAT) { + if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) { FreeFeatureSet(Features); return 0; } From 7169545a86e32f215653f2ed09bbb2adaddbe499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Tue, 29 Nov 2016 11:16:10 +0100 Subject: [PATCH 029/132] fix code style --- api/baseapi.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 55c7892c6e..967e4ee802 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1559,8 +1559,12 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) { } switch (res_it->WordDirection()) { // Only emit direction if different from current paragraph direction - case DIR_LEFT_TO_RIGHT: if (!para_is_ltr) hocr_str += " dir='ltr'"; break; - case DIR_RIGHT_TO_LEFT: if (para_is_ltr) hocr_str += " dir='rtl'"; break; + case DIR_LEFT_TO_RIGHT: + if (!para_is_ltr) hocr_str += " dir='ltr'"; + break; + case DIR_RIGHT_TO_LEFT: + if (para_is_ltr) hocr_str += " dir='rtl'"; + break; case DIR_MIX: case DIR_NEUTRAL: default: // Do nothing. From d01dd0bdd45e3f3c73396b4d7f40f1f79fa42351 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Tue, 29 Nov 2016 11:18:52 +0100 Subject: [PATCH 030/132] backport from 4.00: show PSM 11-13 --- api/tesseractmain.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 71a0a6a8b7..a6f766eed1 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -111,15 +111,11 @@ void PrintHelpForPSM() { " 8 Treat the image as a single word.\n" " 9 Treat the image as a single word in a circle.\n" " 10 Treat the image as a single character.\n" - //TODO: Consider publishing these modes. - #if 0 " 11 Sparse text. Find as much text as possible in no" " particular order.\n" " 12 Sparse text with OSD.\n" " 13 Raw line. Treat the image as a single text line,\n" - "\t\t\tbypassing hacks that are Tesseract-specific.\n" - #endif - ; + "\t\t\tbypassing hacks that are Tesseract-specific.\n"; printf("%s", msg); } From c8e2be63d0116d04f1c4baeae23849a4bd07fa2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Tue, 29 Nov 2016 11:21:21 +0100 Subject: [PATCH 031/132] backport from 4.00: fix pdfrenderer --- api/pdfrenderer.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/api/pdfrenderer.cpp b/api/pdfrenderer.cpp index dc90c5a3bf..80f211bb17 100644 --- a/api/pdfrenderer.cpp +++ b/api/pdfrenderer.cpp @@ -620,7 +620,6 @@ bool TessPDFRenderer::BeginDocumentHandler() { AppendPDFObject(buf); // FONT DESCRIPTOR - const int kCharHeight = 2; // Effect: highlights are half height n = snprintf(buf, sizeof(buf), "7 0 obj\n" "<<\n" @@ -636,10 +635,10 @@ bool TessPDFRenderer::BeginDocumentHandler() { " /Type /FontDescriptor\n" ">>\n" "endobj\n", - 1000 / kCharHeight, - 1000 / kCharHeight, + 1000, + 1000, 1000 / kCharWidth, - 1000 / kCharHeight, + 1000, 8L // Font data ); if (n >= sizeof(buf)) return false; From aa9be096a8f7f0587a480b87b4346780ba8b7f03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Tue, 29 Nov 2016 16:50:57 +0100 Subject: [PATCH 032/132] add License info to cmake files --- cmake/BuildFunctions.cmake | 9 +++++++++ cmake/Configure.cmake | 9 +++++++++ cmake/FindICU.cmake | 9 +++++++++ cmake/SourceGroups.cmake | 11 +++++++++++ 4 files changed, 38 insertions(+) diff --git a/cmake/BuildFunctions.cmake b/cmake/BuildFunctions.cmake index eea5a396cb..39fd6d7071 100644 --- a/cmake/BuildFunctions.cmake +++ b/cmake/BuildFunctions.cmake @@ -1,3 +1,12 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. ################################################################################ # # macros and functions diff --git a/cmake/Configure.cmake b/cmake/Configure.cmake index d7f4ac6ad5..bd5b80c5a7 100644 --- a/cmake/Configure.cmake +++ b/cmake/Configure.cmake @@ -1,3 +1,12 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. ################################################################################ # # configure diff --git a/cmake/FindICU.cmake b/cmake/FindICU.cmake index cd6bf9265c..8381c2eb84 100644 --- a/cmake/FindICU.cmake +++ b/cmake/FindICU.cmake @@ -1,3 +1,12 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # This module can find the International Components for Unicode (ICU) Library # # Requirements: diff --git a/cmake/SourceGroups.cmake b/cmake/SourceGroups.cmake index ca87e808d3..7e79927e6c 100644 --- a/cmake/SourceGroups.cmake +++ b/cmake/SourceGroups.cmake @@ -1,3 +1,12 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. #include(SourceGroups) set(SSRC ${CMAKE_SOURCE_DIR}) @@ -14,6 +23,7 @@ set(H_CPP "(${H}|${CPP})") source_group("Resource files" ".*\\.(rc|ico)") source_group("api" "${SSRC}/api/${H_CPP}") +source_group("arch" "${SSRC}/arch/${H_CPP}") source_group("ccmain" "${SSRC}/ccmain/${H_CPP}") source_group("ccstruct" "${SSRC}/ccstruct/${H_CPP}") source_group("ccutil" "${SSRC}/ccutil/${H_CPP}") @@ -21,6 +31,7 @@ source_group("classify" "${SSRC}/classify/${H_CPP}") source_group("cube" "${SSRC}/cube/${H_CPP}") source_group("cutil" "${SSRC}/cutil/${H_CPP}") source_group("dict" "${SSRC}/dict/${H_CPP}") +source_group("lstm" "${SSRC}/lstm/${H_CPP}") source_group("neural" "${SSRC}/neural_networks/runtime/${H_CPP}") source_group("opencl" "${SSRC}/opencl/${H_CPP}") source_group("textord" "${SSRC}/textord/${H_CPP}") From 01cf9c25cf8713bb57fde24cb7b09e971b61dacc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Tue, 29 Nov 2016 16:51:12 +0100 Subject: [PATCH 033/132] increase GENERIC_MINOR_VERSION --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index dd1cbba005..2e8883d05e 100644 --- a/configure.ac +++ b/configure.ac @@ -43,7 +43,7 @@ GENERIC_LIBRARY_NAME=tesseract # Release versioning GENERIC_MAJOR_VERSION=3 -GENERIC_MINOR_VERSION=4 +GENERIC_MINOR_VERSION=5 GENERIC_MICRO_VERSION=0 # API version (often = GENERIC_MAJOR_VERSION.GENERIC_MINOR_VERSION) From c778cd50c08ac8966eaf4216ee409d332580182a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Tue, 29 Nov 2016 16:56:04 +0100 Subject: [PATCH 034/132] backport from 4.00: changes in scrollviewer --- viewer/scrollview.h | 4 +-- viewer/svutil.cpp | 64 +++++++++++++++++++++++++-------------------- 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/viewer/scrollview.h b/viewer/scrollview.h index 12fac4e5db..cf1e182efb 100644 --- a/viewer/scrollview.h +++ b/viewer/scrollview.h @@ -89,7 +89,7 @@ class SVEventHandler { // Gets called by the SV Window. Does nothing on default, overwrite this // to implement the desired behaviour - virtual void Notify(const SVEvent* sve) { } + virtual void Notify(const SVEvent* sve) { (void)sve; } }; // The ScrollView class provides the expernal API to the scrollviewer process. @@ -327,7 +327,7 @@ class ScrollView { // be unique among menubar eventIDs. void MenuItem(const char* parent, const char* name, int cmdEvent); -// This adds a new checkbox entry, which might initially be flagged. + // This adds a new checkbox entry, which might initially be flagged. void MenuItem(const char* parent, const char* name, int cmdEvent, bool flagged); diff --git a/viewer/svutil.cpp b/viewer/svutil.cpp index 80e11019e5..34a22861eb 100644 --- a/viewer/svutil.cpp +++ b/viewer/svutil.cpp @@ -22,6 +22,7 @@ #include #ifdef _WIN32 +#include struct addrinfo { struct sockaddr* ai_addr; int ai_addrlen; @@ -31,13 +32,13 @@ struct addrinfo { }; #else #include +#include #include #include #include #include #include #include -#include #include #include #ifdef __linux__ @@ -56,10 +57,34 @@ struct addrinfo { #include "config_auto.h" #endif -#ifndef GRAPHICS_DISABLED - #include "svutil.h" +SVMutex::SVMutex() { +#ifdef _WIN32 + mutex_ = CreateMutex(0, FALSE, 0); +#else + pthread_mutex_init(&mutex_, NULL); +#endif +} + +void SVMutex::Lock() { +#ifdef _WIN32 + WaitForSingleObject(mutex_, INFINITE); +#else + pthread_mutex_lock(&mutex_); +#endif +} + +void SVMutex::Unlock() { +#ifdef _WIN32 + ReleaseMutex(mutex_); +#else + pthread_mutex_unlock(&mutex_); +#endif +} + +#ifndef GRAPHICS_DISABLED + const int kMaxMsgSize = 4096; // Signals a thread to exit. @@ -161,29 +186,6 @@ void SVSemaphore::Wait() { #endif } -SVMutex::SVMutex() { -#ifdef _WIN32 - mutex_ = CreateMutex(0, FALSE, 0); -#else - pthread_mutex_init(&mutex_, NULL); -#endif -} - -void SVMutex::Lock() { -#ifdef _WIN32 - WaitForSingleObject(mutex_, INFINITE); -#else - pthread_mutex_lock(&mutex_); -#endif -} - -void SVMutex::Unlock() { -#ifdef _WIN32 - ReleaseMutex(mutex_); -#else - pthread_mutex_unlock(&mutex_); -#endif -} // Create new thread. @@ -200,7 +202,10 @@ void SVSync::StartThread(void *(*func)(void*), void* arg) { &threadid); // returns the thread identifier #else pthread_t helper; - pthread_create(&helper, NULL, func, arg); + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + pthread_create(&helper, &attr, func, arg); #endif } @@ -214,7 +219,7 @@ void SVNetwork::Send(const char* msg) { // Send the whole buffer. void SVNetwork::Flush() { mutex_send_->Lock(); - while (msg_buffer_out_.size() > 0) { + while (!msg_buffer_out_.empty()) { int i = send(stream_, msg_buffer_out_.c_str(), msg_buffer_out_.length(), 0); msg_buffer_out_.erase(0, i); } @@ -302,7 +307,8 @@ static std::string ScrollViewCommand(std::string scrollview_path) { const char* cmd_template = "-Djava.library.path=%s -jar %s/ScrollView.jar"; #else - const char* cmd_template = "-c \"trap 'kill %%1' 0 1 2 ; java " + const char* cmd_template = + "-c \"trap 'kill %%1' 0 1 2 ; java " "-Xms1024m -Xmx2048m -jar %s/ScrollView.jar" " & wait\""; #endif From 493312c06e67a75efb8cc25fe6440c4d13d5c155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Tue, 29 Nov 2016 16:59:26 +0100 Subject: [PATCH 035/132] backport from 4.00: fix of destroy_nodes (oldlist.cpp) --- cutil/oldlist.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cutil/oldlist.cpp b/cutil/oldlist.cpp index 52c0d8680a..9e3f6f4c06 100644 --- a/cutil/oldlist.cpp +++ b/cutil/oldlist.cpp @@ -206,8 +206,8 @@ void destroy_nodes(LIST list, void_dest destructor) { destructor = memfree; while (list != NIL_LIST) { - (*destructor) (first_node (list)); - list = pop (list); + if (first_node(list) != NULL) (*destructor)(first_node(list)); + list = pop(list); } } @@ -401,7 +401,6 @@ LIST s_adjoin(LIST var_list, void *variable, int_compare compare) { return (push_last (var_list, variable)); } - /********************************************************************** * s e a r c h * From 382fdd52df3972dafcbb0f19749a47d4aae37be3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 30 Nov 2016 09:35:35 +0100 Subject: [PATCH 036/132] backport from 4.00: changes in cube --- cube/cube_search_object.cpp | 2 +- cube/hybrid_neural_net_classifier.cpp | 2 +- cube/word_unigrams.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cube/cube_search_object.cpp b/cube/cube_search_object.cpp index 61294f26b6..731dd35276 100644 --- a/cube/cube_search_object.cpp +++ b/cube/cube_search_object.cpp @@ -400,7 +400,7 @@ bool CubeSearchObject::ComputeSpaceCosts() { float prob = 0.0; // gap is too small => no space - if (gap < min_spc_gap_) { + if (gap < min_spc_gap_ || max_spc_gap_ == min_spc_gap_) { prob = 0.0; } else if (gap > max_spc_gap_) { // gap is too big => definite space diff --git a/cube/hybrid_neural_net_classifier.cpp b/cube/hybrid_neural_net_classifier.cpp index 671a74acdf..9aa3026d8b 100644 --- a/cube/hybrid_neural_net_classifier.cpp +++ b/cube/hybrid_neural_net_classifier.cpp @@ -330,7 +330,7 @@ bool HybridNeuralNetCharClassifier::LoadNets(const string &data_file_path, // split into lines vector str_vec; CubeUtils::SplitStringUsing(str, "\r\n", &str_vec); - if (str_vec.size() <= 0) { + if (str_vec.empty()) { return false; } diff --git a/cube/word_unigrams.cpp b/cube/word_unigrams.cpp index 70cc9ee1e2..b92289d8e8 100644 --- a/cube/word_unigrams.cpp +++ b/cube/word_unigrams.cpp @@ -163,7 +163,7 @@ int WordUnigrams::Cost(const char_32 *key_str32, CubeUtils::SplitStringUsing(key_str, " \t", &words); // no words => no cost - if (words.size() <= 0) { + if (words.empty()) { return 0; } From 70ed782ab50bd5011a7ac07e509c9e208cd4aa49 Mon Sep 17 00:00:00 2001 From: Zdenko Podobn?? Date: Wed, 30 Nov 2016 10:27:02 +0100 Subject: [PATCH 037/132] style fix --- ccstruct/pageres.h | 2 +- ccutil/genericvector.h | 3 +-- classify/mastertrainer.cpp | 4 ++-- dict/dict.cpp | 3 +-- textord/tospace.cpp | 7 +++---- training/fileio.cpp | 5 +++-- training/pango_font_info.h | 14 +++++++------- training/stringrenderer.cpp | 7 +++---- training/tesstrain_utils.sh | 4 ++-- 9 files changed, 23 insertions(+), 26 deletions(-) diff --git a/ccstruct/pageres.h b/ccstruct/pageres.h index fc84d4d0ea..22c5ccb774 100644 --- a/ccstruct/pageres.h +++ b/ccstruct/pageres.h @@ -1,7 +1,7 @@ /********************************************************************** * File: pageres.h (Formerly page_res.h) * Description: Results classes used by control.c - * Author: Phil Cheatle + * Author: Phil Cheatle * Created: Tue Sep 22 08:42:49 BST 1992 * * (C) Copyright 1992, Hewlett-Packard Ltd. diff --git a/ccutil/genericvector.h b/ccutil/genericvector.h index d867d8929b..6d8187b09d 100644 --- a/ccutil/genericvector.h +++ b/ccutil/genericvector.h @@ -238,14 +238,13 @@ class GenericVector { int binary_search(const T& target) const { int bottom = 0; int top = size_used_; - do { + while (top - bottom > 1) { int middle = (bottom + top) / 2; if (data_[middle] > target) top = middle; else bottom = middle; } - while (top - bottom > 1); return bottom; } diff --git a/classify/mastertrainer.cpp b/classify/mastertrainer.cpp index 866a617a22..45b21d09ba 100644 --- a/classify/mastertrainer.cpp +++ b/classify/mastertrainer.cpp @@ -362,8 +362,8 @@ bool MasterTrainer::LoadFontInfo(const char* filename) { fontinfo.name = font_name; fontinfo.properties = 0; fontinfo.universal_id = 0; - if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name, - &italic, &bold, &fixed, &serif, &fraktur) != 6) + if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name, &italic, &bold, + &fixed, &serif, &fraktur) != 6) continue; fontinfo.properties = (italic << 0) + diff --git a/dict/dict.cpp b/dict/dict.cpp index 6503bf4de8..0ae4756a43 100644 --- a/dict/dict.cpp +++ b/dict/dict.cpp @@ -30,7 +30,7 @@ namespace tesseract { class Image; -Dict::Dict(CCUtil* ccutil) +Dict::Dict(CCUtil *ccutil) : letter_is_okay_(&tesseract::Dict::def_letter_is_okay), probability_in_context_(&tesseract::Dict::def_probability_in_context), params_model_classify_(NULL), @@ -165,7 +165,6 @@ Dict::Dict(CCUtil* ccutil) getCCUtil()->params()), INT_MEMBER(max_permuter_attempts, 10000, "Maximum number of different" - " character choices to consider during permutation." " This limit is especially useful when user patterns" " are specified, since overly generic patterns can result in" diff --git a/textord/tospace.cpp b/textord/tospace.cpp index 8a290772af..933de2d5c8 100644 --- a/textord/tospace.cpp +++ b/textord/tospace.cpp @@ -462,10 +462,9 @@ void Textord::row_spacing_stats( MAX (row->kern_size, 2.5))); if (row->space_threshold > sane_threshold) { if (tosp_debug_level > 5) - tprintf ("B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n", - block_idx, row_idx, - row->kern_size, - row->space_threshold, row->space_size, sane_threshold); + tprintf("B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n", + block_idx, row_idx, row->kern_size, row->space_threshold, + row->space_size, sane_threshold); row->space_threshold = sane_threshold; if (row->space_size <= sane_threshold) row->space_size = row->space_threshold + 1.0f; diff --git a/training/fileio.cpp b/training/fileio.cpp index e3e43bd023..f82582da74 100644 --- a/training/fileio.cpp +++ b/training/fileio.cpp @@ -81,8 +81,9 @@ bool File::ReadFileToString(const string& filename, string* out) { } string File::JoinPath(const string& prefix, const string& suffix) { - return (!prefix.size() || prefix[prefix.size() - 1] == '/') ? - prefix + suffix : prefix + "/" + suffix; + return (!prefix.size() || prefix[prefix.size() - 1] == '/') + ? prefix + suffix + : prefix + "/" + suffix; } bool File::Delete(const char* pathname) { diff --git a/training/pango_font_info.h b/training/pango_font_info.h index 421139a163..f07d712f11 100644 --- a/training/pango_font_info.h +++ b/training/pango_font_info.h @@ -93,15 +93,15 @@ class PangoFontInfo { // Font Family name eg. "Arial" const string& family_name() const { return family_name_; } // Size in points (1/72"), rounded to the nearest integer. - int font_size() const { return font_size_; } - bool is_bold() const { return is_bold_; } - bool is_italic() const { return is_italic_; } - bool is_smallcaps() const { return is_smallcaps_; } - bool is_monospace() const { return is_monospace_; } - bool is_fraktur() const { return is_fraktur_; } + int font_size() const { return font_size_; } + bool is_bold() const { return is_bold_; } + bool is_italic() const { return is_italic_; } + bool is_smallcaps() const { return is_smallcaps_; } + bool is_monospace() const { return is_monospace_; } + bool is_fraktur() const { return is_fraktur_; } FontTypeEnum font_type() const { return font_type_; } - int resolution() const { return resolution_; } + int resolution() const { return resolution_; } void set_resolution(const int resolution) { resolution_ = resolution; } diff --git a/training/stringrenderer.cpp b/training/stringrenderer.cpp index fbff55fb11..66bbf7d28e 100644 --- a/training/stringrenderer.cpp +++ b/training/stringrenderer.cpp @@ -52,7 +52,7 @@ static const int kDefaultOutputResolution = 300; // Word joiner (U+2060) inserted after letters in ngram mode, as per // recommendation in http://unicode.org/reports/tr14/ to avoid line-breaks at // hyphens and other non-alpha characters. -static const char* kWordJoinerUTF8 = "\xE2\x81\xA0"; //u8"\u2060"; +static const char* kWordJoinerUTF8 = "\xE2\x81\xA0"; // u8"\u2060"; static const char32 kWordJoiner = 0x2060; static bool IsCombiner(int ch) { @@ -108,6 +108,7 @@ StringRenderer::StringRenderer(const string& font_desc, int page_width, underline_start_prob_(0), underline_continuation_prob_(0), underline_style_(PANGO_UNDERLINE_SINGLE), + features_(NULL), drop_uncovered_chars_(true), strip_unrenderable_words_(false), add_ligatures_(false), @@ -120,7 +121,6 @@ StringRenderer::StringRenderer(const string& font_desc, int page_width, box_padding_(0), total_chars_(0), font_index_(0), - features_(NULL), last_offset_(0) { pen_color_[0] = 0.0; pen_color_[1] = 0.0; @@ -209,8 +209,7 @@ void StringRenderer::SetLayoutProperties() { #if (PANGO_VERSION_MAJOR == 1 && PANGO_VERSION_MINOR >= 38) if (add_ligatures_) { set_features("liga, clig, dlig, hlig"); - PangoAttribute* feature_attr = - pango_attr_font_features_new(features_); + PangoAttribute* feature_attr = pango_attr_font_features_new(features_); pango_attr_list_change(attr_list, feature_attr); } #endif diff --git a/training/tesstrain_utils.sh b/training/tesstrain_utils.sh index c45d00378d..48df3e4d59 100755 --- a/training/tesstrain_utils.sh +++ b/training/tesstrain_utils.sh @@ -90,8 +90,8 @@ parse_flags() { --) break;; --fontlist) - fn=0 - FONTS="" + fn=0 + FONTS="" while test $j -lt ${#ARGV[@]}; do test -z "${ARGV[$j]}" && break test `echo ${ARGV[$j]} | cut -c -2` = "--" && break From 6234da6686eaa0a56337bb64bef4817f87b89949 Mon Sep 17 00:00:00 2001 From: Zdenko Podobn?? Date: Wed, 30 Nov 2016 10:27:02 +0100 Subject: [PATCH 038/132] backport from 4.00: changes in wordrec + FakeWordFromRatings --- ccstruct/pageres.cpp | 4 ++-- ccstruct/pageres.h | 2 +- wordrec/chopper.cpp | 2 +- wordrec/language_model.cpp | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ccstruct/pageres.cpp b/ccstruct/pageres.cpp index b66e5636ff..d7f0edaf80 100644 --- a/ccstruct/pageres.cpp +++ b/ccstruct/pageres.cpp @@ -882,14 +882,14 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) { choice_it.add_after_then_move(choices[c]); ratings->put(c, c, choice_list); } - FakeWordFromRatings(); + FakeWordFromRatings(TOP_CHOICE_PERM); reject_map.initialise(blob_count); done = true; } // Creates a WERD_CHOICE for the word using the top choices from the leading // diagonal of the ratings matrix. -void WERD_RES::FakeWordFromRatings() { +void WERD_RES::FakeWordFromRatings(PermuterType permuter) { int num_blobs = ratings->dimension(); WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs); word_choice->set_permuter(TOP_CHOICE_PERM); diff --git a/ccstruct/pageres.h b/ccstruct/pageres.h index 22c5ccb774..446bd7240d 100644 --- a/ccstruct/pageres.h +++ b/ccstruct/pageres.h @@ -590,7 +590,7 @@ class WERD_RES : public ELIST_LINK { // Creates a WERD_CHOICE for the word using the top choices from the leading // diagonal of the ratings matrix. - void FakeWordFromRatings(); + void FakeWordFromRatings(PermuterType permuter); // Copies the best_choice strings to the correct_text for adaption/training. void BestChoiceToCorrectText(); diff --git a/wordrec/chopper.cpp b/wordrec/chopper.cpp index 69a458bc2c..850cfcabda 100644 --- a/wordrec/chopper.cpp +++ b/wordrec/chopper.cpp @@ -426,7 +426,7 @@ void Wordrec::chop_word_main(WERD_RES *word) { if (word->best_choice == NULL) { // SegSearch found no valid paths, so just use the leading diagonal. - word->FakeWordFromRatings(); + word->FakeWordFromRatings(TOP_CHOICE_PERM); } word->RebuildBestState(); // If we finished without a hyphen at the end of the word, let the next word diff --git a/wordrec/language_model.cpp b/wordrec/language_model.cpp index f0e3be66f2..361fb5c585 100644 --- a/wordrec/language_model.cpp +++ b/wordrec/language_model.cpp @@ -32,7 +32,7 @@ #include "params.h" #include "params_training_featdef.h" -#if defined(_MSC_VER) || defined(ANDROID) +#if (defined(_MSC_VER) && _MSC_VER < 1900) || defined(ANDROID) double log2(double n) { return log(n) / log(2.0); } From 5882261969509e8bbc0bf67f406d25b194721c6e Mon Sep 17 00:00:00 2001 From: Zdenko Podobn?? Date: Wed, 30 Nov 2016 10:27:02 +0100 Subject: [PATCH 039/132] backport from 4.00: use "const TBOX&" instead of "TBOX box" in textord --- textord/alignedblob.cpp | 2 +- textord/bbgrid.cpp | 4 ++-- textord/ccnontextdetect.cpp | 2 +- textord/colpartition.cpp | 4 ++-- textord/colpartitiongrid.cpp | 6 +++--- textord/imagefind.cpp | 4 ++-- textord/strokewidth.cpp | 4 ++-- textord/tabfind.cpp | 4 ++-- textord/tablefind.cpp | 10 +++++----- textord/tabvector.cpp | 6 +++--- textord/tordmain.cpp | 2 +- 11 files changed, 24 insertions(+), 24 deletions(-) diff --git a/textord/alignedblob.cpp b/textord/alignedblob.cpp index 007d4ad38f..0dab26bf8d 100644 --- a/textord/alignedblob.cpp +++ b/textord/alignedblob.cpp @@ -188,7 +188,7 @@ ScrollView* AlignedBlob::DisplayTabs(const char* window_name, gsearch.StartFullSearch(); BLOBNBOX* bbox; while ((bbox = gsearch.NextFullSearch()) != NULL) { - TBOX box = bbox->bounding_box(); + const TBOX& box = bbox->bounding_box(); int left_x = box.left(); int right_x = box.right(); int top_y = box.top(); diff --git a/textord/bbgrid.cpp b/textord/bbgrid.cpp index 06114748f5..4cadcdcf2e 100644 --- a/textord/bbgrid.cpp +++ b/textord/bbgrid.cpp @@ -231,7 +231,7 @@ Pix* GridReducedPix(const TBOX& box, int gridsize, // Note that the Pix is used upside-down, with (0, 0) being the bottom-left. Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize, ICOORD bleft, int* left, int* bottom) { - TBOX box = outline->bounding_box(); + const TBOX& box = outline->bounding_box(); Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom); int wpl = pixGetWpl(pix); l_uint32* data = pixGetData(pix); @@ -257,7 +257,7 @@ Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize, // As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE. Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize, ICOORD bleft, int* left, int* bottom) { - TBOX box = block->bounding_box(); + const TBOX& box = block->bounding_box(); Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom); int wpl = pixGetWpl(pix); l_uint32* data = pixGetData(pix); diff --git a/textord/ccnontextdetect.cpp b/textord/ccnontextdetect.cpp index 1cb0e4c6c7..f6a7d8f41a 100644 --- a/textord/ccnontextdetect.cpp +++ b/textord/ccnontextdetect.cpp @@ -305,7 +305,7 @@ bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) { // Search the grid to see what intersects it. // Setup a Rectangle search for overlapping this blob. BlobGridSearch rsearch(this); - TBOX box = blob->bounding_box(); + const TBOX& box = blob->bounding_box(); rsearch.StartRectSearch(box); rsearch.SetUniqueMode(true); BLOBNBOX* neighbour; diff --git a/textord/colpartition.cpp b/textord/colpartition.cpp index effb5a9112..154a1acef1 100644 --- a/textord/colpartition.cpp +++ b/textord/colpartition.cpp @@ -918,7 +918,7 @@ void ColPartition::ComputeLimits() { for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { bbox = it.data(); if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) { - TBOX box = bbox->bounding_box(); + const TBOX& box = bbox->bounding_box(); int area = box.area(); top_stats.add(box.top(), area); bottom_stats.add(box.bottom(), area); @@ -2121,7 +2121,7 @@ void ColPartition::RefinePartnersByOverlap(bool upper, // Return true if bbox belongs better in this than other. bool ColPartition::ThisPartitionBetter(BLOBNBOX* bbox, const ColPartition& other) { - TBOX box = bbox->bounding_box(); + const TBOX& box = bbox->bounding_box(); // Margins take priority. int left = box.left(); int right = box.right(); diff --git a/textord/colpartitiongrid.cpp b/textord/colpartitiongrid.cpp index 4d703fbe7d..efc5aa0660 100644 --- a/textord/colpartitiongrid.cpp +++ b/textord/colpartitiongrid.cpp @@ -86,7 +86,7 @@ void ColPartitionGrid::HandleClick(int x, int y) { ColPartition* neighbour; FCOORD click(x, y); while ((neighbour = radsearch.NextRadSearch()) != NULL) { - TBOX nbox = neighbour->bounding_box(); + const TBOX& nbox = neighbour->bounding_box(); if (nbox.contains(click)) { tprintf("Block box:"); neighbour->bounding_box().print(); @@ -1037,7 +1037,7 @@ void ColPartitionGrid::ListFindMargins(ColPartitionSet** best_columns, ColPartition* part = part_it.data(); ColPartitionSet* columns = NULL; if (best_columns != NULL) { - TBOX part_box = part->bounding_box(); + const TBOX& part_box = part->bounding_box(); // Get the columns from the y grid coord. int grid_x, grid_y; GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); @@ -1569,7 +1569,7 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection( const TBOX& im_box, const FCOORD& rerotation, bool debug, const ColPartition& part, int* best_distance) { // Set up a rectangle search bounded by the part. - TBOX part_box = part.bounding_box(); + const TBOX& part_box = part.bounding_box(); TBOX search_box; ICOORD dist_scaling; ComputeSearchBoxAndScaling(direction, part_box, gridsize(), diff --git a/textord/imagefind.cpp b/textord/imagefind.cpp index 4f27779b8e..14442d52ed 100644 --- a/textord/imagefind.cpp +++ b/textord/imagefind.cpp @@ -1115,7 +1115,7 @@ static bool TestWeakIntersectedPart(const TBOX& im_box, ColPartition* part) { if (part->flow() < BTFT_STRONG_CHAIN) { // A weak partition intersects the box. - TBOX part_box = part->bounding_box(); + const TBOX& part_box = part->bounding_box(); if (im_box.contains(part_box)) { int area = part_box.area(); int intersect_area = IntersectArea(part_box, part_list); @@ -1180,7 +1180,7 @@ static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) { part->flow() == BTFT_STRONG_CHAIN) { // Text intersects the box. any_text_in_padded_rect = true; - TBOX part_box = part->bounding_box(); + const TBOX& part_box = part->bounding_box(); if (box->overlap(part_box)) { return true; } diff --git a/textord/strokewidth.cpp b/textord/strokewidth.cpp index 5d0fdc5133..059aa9b85e 100644 --- a/textord/strokewidth.cpp +++ b/textord/strokewidth.cpp @@ -393,7 +393,7 @@ void StrokeWidth::GradeBlobsIntoPartitions( } static void PrintBoxWidths(BLOBNBOX* neighbour) { - TBOX nbox = neighbour->bounding_box(); + const TBOX& nbox = neighbour->bounding_box(); tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), neighbour->horz_stroke_width(), neighbour->vert_stroke_width(), @@ -1939,7 +1939,7 @@ ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name, gsearch.StartFullSearch(); BLOBNBOX* bbox; while ((bbox = gsearch.NextFullSearch()) != NULL) { - TBOX box = bbox->bounding_box(); + const TBOX& box = bbox->bounding_box(); int left_x = box.left(); int right_x = box.right(); int top_y = box.top(); diff --git a/textord/tabfind.cpp b/textord/tabfind.cpp index dc7a072b7d..30bad8bb3c 100644 --- a/textord/tabfind.cpp +++ b/textord/tabfind.cpp @@ -229,7 +229,7 @@ void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height, bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0, *gutter_width, box.top(), box.bottom()); if (gutter_bbox != NULL) { - TBOX gutter_box = gutter_bbox->bounding_box(); + const TBOX& gutter_box = gutter_bbox->bounding_box(); *gutter_width = left ? tab_x - gutter_box.right() : gutter_box.left() - tab_x; } @@ -261,7 +261,7 @@ void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height, int neighbour_edge = left ? RightEdgeForBox(box, true, false) : LeftEdgeForBox(box, true, false); if (neighbour != NULL) { - TBOX n_box = neighbour->bounding_box(); + const TBOX& n_box = neighbour->bounding_box(); if (debug) { tprintf("Found neighbour:"); n_box.print(); diff --git a/textord/tablefind.cpp b/textord/tablefind.cpp index 425bdbc218..b68655a57c 100644 --- a/textord/tablefind.cpp +++ b/textord/tablefind.cpp @@ -550,7 +550,7 @@ void TableFinder::GroupColumnBlocks(ColSegment_LIST* new_blocks, // iterate through the source list for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { ColSegment* src_seg = src_it.data(); - TBOX src_box = src_seg->bounding_box(); + const TBOX& src_box = src_seg->bounding_box(); bool match_found = false; // iterate through the destination list to find a matching column block for (dest_it.mark_cycle_pt(); !dest_it.cycled_list(); dest_it.forward()) { @@ -1342,7 +1342,7 @@ void TableFinder::GetTableRegions(ColSegment_LIST* table_columns, // create a bool array to hold projection on y-axis bool* table_region = new bool[page_height]; while ((part = gsearch.NextFullSearch()) != NULL) { - TBOX part_box = part->bounding_box(); + const TBOX& part_box = part->bounding_box(); // reset the projection array for (int i = 0; i < page_height; i++) { table_region[i] = false; @@ -1974,7 +1974,7 @@ void TableFinder::DisplayColPartitionConnections( ColPartition* upper_part = part->nearest_neighbor_above(); if (upper_part) { - TBOX upper_box = upper_part->bounding_box(); + const TBOX& upper_box = upper_part->bounding_box(); int mid_x = (left_x + right_x) / 2; int mid_y = (top_y + bottom_y) / 2; int other_x = (upper_box.left() + upper_box.right()) / 2; @@ -1985,7 +1985,7 @@ void TableFinder::DisplayColPartitionConnections( } ColPartition* lower_part = part->nearest_neighbor_below(); if (lower_part) { - TBOX lower_box = lower_part->bounding_box(); + const TBOX& lower_box = lower_part->bounding_box(); int mid_x = (left_x + right_x) / 2; int mid_y = (top_y + bottom_y) / 2; int other_x = (lower_box.left() + lower_box.right()) / 2; @@ -2098,7 +2098,7 @@ void TableFinder::MakeTableBlocks(ColPartitionGrid* grid, table_search.StartFullSearch(); ColSegment* table; while ((table = table_search.NextFullSearch()) != NULL) { - TBOX table_box = table->bounding_box(); + const TBOX& table_box = table->bounding_box(); // Start a rect search on table_box GridSearch rectsearch(grid); diff --git a/textord/tabvector.cpp b/textord/tabvector.cpp index fcc64a7721..f3e99fa38f 100644 --- a/textord/tabvector.cpp +++ b/textord/tabvector.cpp @@ -435,7 +435,7 @@ bool TabVector::SimilarTo(const ICOORD& vertical, vsearch.StartVerticalSearch(left, right, top_y); BLOBNBOX* blob; while ((blob = vsearch.NextVerticalSearch(true)) != NULL) { - TBOX box = blob->bounding_box(); + const TBOX& box = blob->bounding_box(); if (box.top() > bottom_y) return true; // Nothing found. if (box.bottom() < top_y) @@ -806,7 +806,7 @@ bool TabVector::Fit(ICOORD vertical, bool force_parallel) { // Fit a line to all the boxes in the list. for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); - TBOX box = bbox->bounding_box(); + const TBOX& box = bbox->bounding_box(); int x1 = IsRightTab() ? box.right() : box.left(); ICOORD boxpt(x1, box.bottom()); linepoints.Add(boxpt); @@ -831,7 +831,7 @@ bool TabVector::Fit(ICOORD vertical, bool force_parallel) { int width_count = 0; for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { BLOBNBOX* bbox = it.data(); - TBOX box = bbox->bounding_box(); + const TBOX& box = bbox->bounding_box(); mean_width_ += box.width(); ++width_count; int x1 = IsRightTab() ? box.right() : box.left(); diff --git a/textord/tordmain.cpp b/textord/tordmain.cpp index f09a186d4f..0c433a1f27 100644 --- a/textord/tordmain.cpp +++ b/textord/tordmain.cpp @@ -360,7 +360,7 @@ void Textord::cleanup_nontext_block(BLOCK* block) { // Non-text blocks must contain at least one row. ROW_IT row_it(block->row_list()); if (row_it.empty()) { - TBOX box = block->bounding_box(); + const TBOX& box = block->bounding_box(); float height = box.height(); inT32 xstarts[2] = {box.left(), box.right()}; double coeffs[3] = {0.0, 0.0, static_cast(box.bottom())}; From ed0c60bc651712a537ca4952702def70e4794230 Mon Sep 17 00:00:00 2001 From: Zdenko Podobn?? Date: Wed, 30 Nov 2016 10:27:02 +0100 Subject: [PATCH 040/132] backport from 4.00: use ".empty()" instead of ".size() > 0" --- api/baseapi.cpp | 2 +- ccmain/control.cpp | 2 +- ccmain/paragraphs.cpp | 12 ++++++------ classify/intfx.cpp | 2 +- classify/shapeclassifier.cpp | 2 +- textord/colpartitiongrid.cpp | 6 +++--- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 967e4ee802..b57bcd51b8 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -2762,7 +2762,7 @@ void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, INT_FX_RESULT_STRUCT fx_info; tesseract_->ExtractFeatures(*blob, false, &bl_features, &cn_features, &fx_info, &outline_counts); - if (cn_features.size() == 0 || cn_features.size() > MAX_NUM_INT_FEATURES) { + if (cn_features.empty() || cn_features.size() > MAX_NUM_INT_FEATURES) { *num_features = 0; return; // Feature extraction failed. } diff --git a/ccmain/control.cpp b/ccmain/control.cpp index 5953698932..cdc2bb5a6e 100644 --- a/ccmain/control.cpp +++ b/ccmain/control.cpp @@ -536,7 +536,7 @@ void Tesseract::bigram_correction_pass(PAGE_RES *page_res) { } } } - if (overrides_word1.size() >= 1) { + if (!overrides_word1.empty()) { // Excellent, we have some bigram matches. if (EqualIgnoringCaseAndTerminalPunct(*w_prev->best_choice, *overrides_word1[best_idx]) && diff --git a/ccmain/paragraphs.cpp b/ccmain/paragraphs.cpp index 7459940dd0..c7d21a9192 100644 --- a/ccmain/paragraphs.cpp +++ b/ccmain/paragraphs.cpp @@ -2052,7 +2052,7 @@ void ConvertHypothesizedModelRunsToParagraphs( bool single_line_paragraph = false; SetOfModels models; rows[start].NonNullHypotheses(&models); - if (models.size() > 0) { + if (!models.empty()) { model = models[0]; if (rows[start].GetLineType(model) != LT_BODY) single_line_paragraph = true; @@ -2190,17 +2190,17 @@ void LeftoverSegments(const GenericVector &rows, SetOfModels models_w_crowns; rows[i].StrongHypotheses(&models); rows[i].NonNullHypotheses(&models_w_crowns); - if (models.empty() && models_w_crowns.size() > 0) { + if (models.empty() && !models_w_crowns.empty()) { // Crown paragraph. Is it followed by a modeled line? for (int end = i + 1; end < rows.size(); end++) { SetOfModels end_models; SetOfModels strong_end_models; rows[end].NonNullHypotheses(&end_models); rows[end].StrongHypotheses(&strong_end_models); - if (end_models.size() == 0) { + if (end_models.empty()) { needs_fixing = true; break; - } else if (strong_end_models.size() > 0) { + } else if (!strong_end_models.empty()) { needs_fixing = false; break; } @@ -2485,7 +2485,7 @@ void InitializeRowInfo(bool after_recognition, info->ltr = ltr >= rtl; info->has_leaders = num_leaders > 3; info->num_words = werds.size(); - if (werds.size() > 0) { + if (!werds.empty()) { WERD_RES *lword = werds[0], *rword = werds[werds.size() - 1]; info->lword_text = lword->best_choice->unichar_string().string(); info->rword_text = rword->best_choice->unichar_string().string(); @@ -2538,7 +2538,7 @@ void DetectParagraphs(int debug_level, // If we're called before text recognition, we might not have // tight block bounding boxes, so trim by the minimum on each side. - if (row_infos.size() > 0) { + if (!row_infos.empty()) { int min_lmargin = row_infos[0].pix_ldistance; int min_rmargin = row_infos[0].pix_rdistance; for (int i = 1; i < row_infos.size(); i++) { diff --git a/classify/intfx.cpp b/classify/intfx.cpp index 78aa59bbc9..9c9870a2e9 100644 --- a/classify/intfx.cpp +++ b/classify/intfx.cpp @@ -520,7 +520,7 @@ bool ExtractIntFeat(const TBLOB& blob, tesseract::Classify::ExtractFeatures(blob, nonlinear_norm, &bl_features, &cn_features, results, NULL); - if (bl_features.size() == 0 || cn_features.size() == 0 || + if (bl_features.empty() || cn_features.empty() || bl_features.size() > MAX_NUM_INT_FEATURES || cn_features.size() > MAX_NUM_INT_FEATURES) { return false; // Feature extraction failed. diff --git a/classify/shapeclassifier.cpp b/classify/shapeclassifier.cpp index a39c8a24bb..e0ee3373d8 100644 --- a/classify/shapeclassifier.cpp +++ b/classify/shapeclassifier.cpp @@ -176,7 +176,7 @@ void ShapeClassifier::UnicharPrintResults( for (int i = 0; i < results.size(); ++i) { tprintf("%g: c_id=%d=%s", results[i].rating, results[i].unichar_id, GetUnicharset().id_to_unichar(results[i].unichar_id)); - if (results[i].fonts.size() != 0) { + if (!results[i].fonts.empty()) { tprintf(" Font Vector:"); for (int f = 0; f < results[i].fonts.size(); ++f) { tprintf(" %d", results[i].fonts[f].fontinfo_id); diff --git a/textord/colpartitiongrid.cpp b/textord/colpartitiongrid.cpp index efc5aa0660..86c883280e 100644 --- a/textord/colpartitiongrid.cpp +++ b/textord/colpartitiongrid.cpp @@ -1376,7 +1376,7 @@ void ColPartitionGrid::FindMergeCandidates(const ColPartition* part, // combined box to see if anything else is inappropriately overlapped. if (!part_box.contains(c_box) && !c_box.contains(part_box)) { // Search the combined rectangle to see if anything new is overlapped. - // This is a preliminary test designed to quickly weed-out stupid + // This is a preliminary test designed to quickly weed-out poor // merge candidates that would create a big list of overlapped objects // for the squared-order overlap analysis. Eg. vertical and horizontal // line-like objects that overlap real text when merged: @@ -1619,10 +1619,10 @@ BlobRegionType ColPartitionGrid::SmoothInOneDirection( image_bias - htext_score >= kSmoothDecisionMargin && image_bias - vtext_score >= kSmoothDecisionMargin) { *best_distance = dists[NPT_IMAGE][0]; - if (dists[NPT_WEAK_VTEXT].size() > 0 && + if (!dists[NPT_WEAK_VTEXT].empty() && *best_distance > dists[NPT_WEAK_VTEXT][0]) *best_distance = dists[NPT_WEAK_VTEXT][0]; - if (dists[NPT_WEAK_HTEXT].size() > 0 && + if (!dists[NPT_WEAK_HTEXT].empty() && *best_distance > dists[NPT_WEAK_HTEXT][0]) *best_distance = dists[NPT_WEAK_HTEXT][0]; return BRT_POLYIMAGE; From e3236f44cef9f73561a43e69fa8024562176821c Mon Sep 17 00:00:00 2001 From: Zdenko Podobn?? Date: Wed, 30 Nov 2016 10:27:02 +0100 Subject: [PATCH 041/132] backport from 4.00: changes in textord --- ccutil/hashfn.h | 28 +++++++++------------------- textord/baselinedetect.cpp | 3 ++- textord/bbgrid.h | 4 ++-- textord/colpartition.cpp | 8 ++++++-- textord/colpartition.h | 19 +++++++++++++++++++ textord/tordmain.cpp | 2 ++ textord/tospace.cpp | 7 ++++--- 7 files changed, 44 insertions(+), 27 deletions(-) diff --git a/ccutil/hashfn.h b/ccutil/hashfn.h index be211b0731..ec96932107 100644 --- a/ccutil/hashfn.h +++ b/ccutil/hashfn.h @@ -20,16 +20,15 @@ #ifndef HASHFN_H #define HASHFN_H -#ifdef USE_STD_NAMESPACE #if (__cplusplus >= 201103L) || defined(_MSC_VER) // Visual Studio #include #include -#define hash_map std::unordered_map -#if (_MSC_VER >= 1500 && _MSC_VER < 1600) // Visual Studio 2008 -using namespace std::tr1; +#if defined(_MSC_VER) && (_MSC_VER >= 1500 && _MSC_VER < 1600) // VS 2008 +#define TessHashMap std::tr1::unordered_map +#define TessHashSet std::tr1::unordered_set #else // _MSC_VER -using std::unordered_map; -using std::unordered_set; +#define TessHashMap std::unordered_map +#define TessHashSet std::unordered_set #include #define SmartPtr std::unique_ptr #define HAVE_UNIQUE_PTR @@ -41,23 +40,14 @@ using std::unordered_set; #include using __gnu_cxx::hash_map; using __gnu_cxx::hash_set; -#define unordered_map hash_map -#define unordered_set hash_set +#define TessHashMap __gnu_cxx::hash_map +#define TessHashSet __gnu_cxx::hash_set #else #include #include +#define TessHashMap hash_map +#define TessHashSet :hash_set #endif // gcc -#elif (__clang__) -#include -#include -#define hash_map std::unordered_map -#define unordered_set std::unordered_set -#else // USE_STD_NAMESPACE -#include -#include -#define unordered_map hash_map -#define unordered_set hash_set -#endif // USE_STD_NAMESPACE #ifndef HAVE_UNIQUE_PTR // Trivial smart ptr. Expand to add features of std::unique_ptr as required. diff --git a/textord/baselinedetect.cpp b/textord/baselinedetect.cpp index a2b0173949..9bbd999e15 100644 --- a/textord/baselinedetect.cpp +++ b/textord/baselinedetect.cpp @@ -850,7 +850,8 @@ void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr, Pix* pix_spline = pix_debug_ ? pixConvertTo32(pix_debug_) : NULL; for (int i = 0; i < blocks_.size(); ++i) { BaselineBlock* bl_block = blocks_[i]; - bl_block->PrepareForSplineFitting(page_tr, remove_noise); + if (enable_splines) + bl_block->PrepareForSplineFitting(page_tr, remove_noise); bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord); if (pix_spline) { bl_block->DrawPixSpline(pix_spline); diff --git a/textord/bbgrid.h b/textord/bbgrid.h index d16b902ecf..066b5bae1e 100644 --- a/textord/bbgrid.h +++ b/textord/bbgrid.h @@ -364,7 +364,7 @@ template class GridSearch { // An iterator over the list at (x_, y_) in the grid_. BBC_C_IT it_; // Set of unique returned elements used when unique_mode_ is true. - unordered_set > returns_; + TessHashSet > returns_; }; // Sort function to sort a BBC by bounding_box().left(). @@ -623,7 +623,7 @@ void BBGrid::DisplayBoxes(ScrollView* tab_win) { gsearch.StartFullSearch(); BBC* bbox; while ((bbox = gsearch.NextFullSearch()) != NULL) { - TBOX box = bbox->bounding_box(); + const TBOX& box = bbox->bounding_box(); int left_x = box.left(); int right_x = box.right(); int top_y = box.top(); diff --git a/textord/colpartition.cpp b/textord/colpartition.cpp index 154a1acef1..0d0b4ca39e 100644 --- a/textord/colpartition.cpp +++ b/textord/colpartition.cpp @@ -1181,8 +1181,8 @@ bool ColPartition::MarkAsLeaderIfMonospaced() { if (best_end == NULL) { tprintf("No path\n"); } else { - tprintf("Total cost = %d vs allowed %d\n", - best_end->total_cost(), blob_count); + tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(), + blob_count); } } delete [] projection; @@ -1632,6 +1632,10 @@ TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright, ColPartition_LIST* used_parts) { if (block_parts->empty()) return NULL; // Nothing to do. + // If the block_parts are not in reading order, then it will make an invalid + // block polygon and bounding_box, so sort by bounding box now just to make + // sure. + block_parts->sort(&ColPartition::SortByBBox); ColPartition_IT it(block_parts); ColPartition* part = it.data(); PolyBlockType type = part->type(); diff --git a/textord/colpartition.h b/textord/colpartition.h index 5c941cce15..7fcbc0004e 100644 --- a/textord/colpartition.h +++ b/textord/colpartition.h @@ -704,6 +704,25 @@ class ColPartition : public ELIST2_LINK { // doing a SideSearch when you want things in the same page column. bool IsInSameColumnAs(const ColPartition& part) const; + // Sort function to sort by bounding box. + static int SortByBBox(const void* p1, const void* p2) { + const ColPartition* part1 = + *reinterpret_cast(p1); + const ColPartition* part2 = + *reinterpret_cast(p2); + int mid_y1 = part1->bounding_box_.y_middle(); + int mid_y2 = part2->bounding_box_.y_middle(); + if ((part2->bounding_box_.bottom() <= mid_y1 && + mid_y1 <= part2->bounding_box_.top()) || + (part1->bounding_box_.bottom() <= mid_y2 && + mid_y2 <= part1->bounding_box_.top())) { + // Sort by increasing x. + return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle(); + } + // Sort by decreasing y. + return mid_y2 - mid_y1; + } + // Sets the column bounds. Primarily used in testing. void set_first_column(int column) { first_column_ = column; diff --git a/textord/tordmain.cpp b/textord/tordmain.cpp index 0c433a1f27..0eaf843ec3 100644 --- a/textord/tordmain.cpp +++ b/textord/tordmain.cpp @@ -251,6 +251,7 @@ void Textord::filter_blobs(ICOORD page_tr, // top right &block->noise_blobs, &block->small_blobs, &block->large_blobs); + if (block->line_size == 0) block->line_size = 1; block->line_spacing = block->line_size * (tesseract::CCStruct::kDescenderFraction + tesseract::CCStruct::kXHeightFraction + @@ -769,6 +770,7 @@ void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs, PointerVector word_ptrs; for (int g = 0; g < groups.size(); ++g) { const BlockGroup* group = groups[g]; + if (group->bounding_box.null_box()) continue; WordGrid word_grid(group->min_xheight, group->bounding_box.botleft(), group->bounding_box.topright()); for (int b = 0; b < group->blocks.size(); ++b) { diff --git a/textord/tospace.cpp b/textord/tospace.cpp index 933de2d5c8..4358436917 100644 --- a/textord/tospace.cpp +++ b/textord/tospace.cpp @@ -1332,9 +1332,10 @@ BOOL8 Textord::make_a_word_break( we may need to set PARTICULAR spaces to fuzzy or not. The values will ONLY be used if the function returns TRUE - ie the word is to be broken. */ - blanks = (uinT8) (current_gap / row->space_size); - if (blanks < 1) - blanks = 1; + int num_blanks = current_gap; + if (row->space_size > 1.0f) + num_blanks = IntCastRounded(current_gap / row->space_size); + blanks = static_cast(ClipToRange(num_blanks, 1, MAX_UINT8)); fuzzy_sp = FALSE; fuzzy_non = FALSE; /* From 57d38b9270a75400abd433497f6a1fb48b07feb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 30 Nov 2016 12:59:42 +0100 Subject: [PATCH 042/132] backport from 4.00: changes in Android.mk --- android/jni/Android.mk | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 4624801469..fd0e5112f0 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -4,7 +4,7 @@ include $(CLEAR_VARS) LOCAL_MODULE := tesseract-$(APP_ABI) LOCAL_STATIC_LIBRARIES := \ - mobile_base \ + base \ leptonica-$(APP_ABI) LOCAL_C_INCLUDES := $(APP_C_INCLUDES) @@ -51,7 +51,6 @@ LOCAL_CFLAGS := -DANDROID_BUILD -DNO_CUBE_BUILD -DGRAPHICS_DISABLED include $(BUILD_SHARED_LIBRARY) -$(call import-module,mobile/base) -$(call import-module,mobile/base) +$(call import-module,base/port) $(call import-module,mobile/util/hash) $(call import-module,third_party/leptonica/android/jni) From 3b777159a427e6e966b4c68e60fbc7d3e2070bd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 30 Nov 2016 18:44:55 +0100 Subject: [PATCH 043/132] backport from 4.00: changes in classify --- classify/adaptmatch.cpp | 7 ++----- classify/intproto.cpp | 8 ++++---- classify/intproto.h | 2 +- classify/mastertrainer.cpp | 6 +++++- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index c02efed7aa..5ea8a49ce4 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -819,7 +819,7 @@ int Classify::GetAdaptiveFeatures(TBLOB *Blob, Features = ExtractPicoFeatures(Blob); NumFeatures = Features->NumFeatures; - if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) { + if (NumFeatures > UNLIKELY_NUM_FEAT) { FreeFeatureSet(Features); return 0; } @@ -908,8 +908,7 @@ void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId, NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures); if (NumFeatures <= 0) { - FreeFeatureSet(FloatFeatures); - return; + return; // Features already freed by GetAdaptiveFeatures. } // Only match configs with the matching font. @@ -1008,8 +1007,6 @@ void Classify::DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class) { #endif } - - /** * This routine adds the result of a classification into * Results. If the new rating is much worse than the current diff --git a/classify/intproto.cpp b/classify/intproto.cpp index 3bbb2777a0..473ffd96db 100644 --- a/classify/intproto.cpp +++ b/classify/intproto.cpp @@ -46,9 +46,7 @@ #include "config_auto.h" #endif -using tesseract::FontInfo; using tesseract::FontSet; -using tesseract::FontSpacingInfo; /* match debug display constants*/ #define PROTO_PRUNER_SCALE (4.0) @@ -1708,8 +1706,10 @@ void InitTableFiller (FLOAT32 EndPad, FLOAT32 SidePad, /* translate into bucket positions and deltas */ Filler->X = Bucket8For(Start.x, XS, NB); - Filler->StartDelta = -(inT16) ((Sin / Cos) * 256); - Filler->EndDelta = (inT16) ((Cos / Sin) * 256); + Filler->StartDelta = static_cast(ClipToRange( + -IntCastRounded((Sin / Cos) * 256), MIN_INT16, MAX_INT16)); + Filler->EndDelta = static_cast(ClipToRange( + IntCastRounded((Cos / Sin) * 256), MIN_INT16, MAX_INT16)); XAdjust = BucketEnd(Filler->X, XS, NB) - Start.x; YAdjust = XAdjust * Sin / Cos; diff --git a/classify/intproto.h b/classify/intproto.h index d2c07147a9..262974b83e 100644 --- a/classify/intproto.h +++ b/classify/intproto.h @@ -52,7 +52,7 @@ class FCOORD; #define NUM_CP_BUCKETS 24 #define CLASSES_PER_CP 32 #define NUM_BITS_PER_CLASS 2 -#define CLASS_PRUNER_CLASS_MASK (~(~0 << NUM_BITS_PER_CLASS)) +#define CLASS_PRUNER_CLASS_MASK (~(~0u << NUM_BITS_PER_CLASS)) #define CLASSES_PER_CP_WERD (CLASSES_PER_CP / NUM_BITS_PER_CLASS) #define PROTOS_PER_PP_WERD BITS_PER_WERD #define BITS_PER_CP_VECTOR (CLASSES_PER_CP * NUM_BITS_PER_CLASS) diff --git a/classify/mastertrainer.cpp b/classify/mastertrainer.cpp index 45b21d09ba..cd7e93b9f2 100644 --- a/classify/mastertrainer.cpp +++ b/classify/mastertrainer.cpp @@ -363,8 +363,10 @@ bool MasterTrainer::LoadFontInfo(const char* filename) { fontinfo.properties = 0; fontinfo.universal_id = 0; if (tfscanf(fp, "%1024s %i %i %i %i %i\n", font_name, &italic, &bold, - &fixed, &serif, &fraktur) != 6) + &fixed, &serif, &fraktur) != 6) { + delete[] font_name; continue; + } fontinfo.properties = (italic << 0) + (bold << 1) + @@ -373,6 +375,8 @@ bool MasterTrainer::LoadFontInfo(const char* filename) { (fraktur << 4); if (!fontinfo_table_.contains(fontinfo)) { fontinfo_table_.push_back(fontinfo); + } else { + delete[] font_name; } } fclose(fp); From 9d1db80c69796ab4e9cbe8e8f887c9f4cf6a5356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 30 Nov 2016 19:29:59 +0100 Subject: [PATCH 044/132] backport from 4.00: changes from ccstruct excluding imagedata --- ccstruct/blobbox.cpp | 6 +- ccstruct/matrix.h | 309 +++++++++++++++++++++++++++++++++++++++++-- ccstruct/mod128.cpp | 13 -- ccstruct/mod128.h | 1 - ccstruct/pageres.cpp | 3 +- ccstruct/pageres.h | 3 + 6 files changed, 304 insertions(+), 31 deletions(-) diff --git a/ccstruct/blobbox.cpp b/ccstruct/blobbox.cpp index 47a625af32..3ffb9dc930 100644 --- a/ccstruct/blobbox.cpp +++ b/ccstruct/blobbox.cpp @@ -31,7 +31,9 @@ #define PROJECTION_MARGIN 10 //arbitrary #define EXTERN -ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK) +ELISTIZE(BLOBNBOX) +ELIST2IZE(TO_ROW) +ELISTIZE(TO_BLOCK) // Up to 30 degrees is allowed for rotations of diacritic blobs. const double kCosSmallAngle = 0.866; @@ -176,7 +178,7 @@ void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const { gaps[dir] = MAX_INT16; BLOBNBOX* neighbour = neighbours_[dir]; if (neighbour != NULL) { - TBOX n_box = neighbour->bounding_box(); + const TBOX& n_box = neighbour->bounding_box(); if (dir == BND_LEFT || dir == BND_RIGHT) { gaps[dir] = box.x_gap(n_box); } else { diff --git a/ccstruct/matrix.h b/ccstruct/matrix.h index 56d261cad7..4b5b242a43 100644 --- a/ccstruct/matrix.h +++ b/ccstruct/matrix.h @@ -1,10 +1,12 @@ /* -*-C-*- ****************************************************************************** - * * File: matrix.h (Formerly matrix.h) * Description: Generic 2-d array/matrix and banded triangular matrix class. * Author: Ray Smith - * Description: Ratings matrix code. (Used by associator) + * TODO(rays) Separate from ratings matrix, which it also contains: + * + * Descrition: Ratings matrix class (specialization of banded matrix). + * Segmentation search matrix of lists of BLOB_CHOICE. * Author: Mark Seaman, OCR Technology * Created: Wed May 16 13:22:06 1990 * Modified: Tue Mar 19 16:00:20 1991 (Mark Seaman) marks@hpgrlt @@ -27,9 +29,13 @@ #ifndef TESSERACT_CCSTRUCT_MATRIX_H__ #define TESSERACT_CCSTRUCT_MATRIX_H__ +#include #include "kdpair.h" +#include "points.h" +#include "serialis.h" #include "unicharset.h" +class BLOB_CHOICE; class BLOB_CHOICE_LIST; #define NOT_CLASSIFIED reinterpret_cast(0) @@ -46,34 +52,60 @@ class GENERIC_2D_ARRAY { // either pass the memory in, or allocate after by calling Resize(). GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty, T* array) : empty_(empty), dim1_(dim1), dim2_(dim2), array_(array) { + size_allocated_ = dim1 * dim2; } // Original constructor for a full rectangular matrix DOES allocate memory // and initialize it to empty. GENERIC_2D_ARRAY(int dim1, int dim2, const T& empty) : empty_(empty), dim1_(dim1), dim2_(dim2) { - array_ = new T[dim1_ * dim2_]; - for (int x = 0; x < dim1_; x++) - for (int y = 0; y < dim2_; y++) - this->put(x, y, empty_); + int new_size = dim1 * dim2; + array_ = new T[new_size]; + size_allocated_ = new_size; + for (int i = 0; i < size_allocated_; ++i) + array_[i] = empty_; + } + // Default constructor for array allocation. Use Resize to set the size. + GENERIC_2D_ARRAY() + : array_(NULL), empty_(static_cast(0)), dim1_(0), dim2_(0), + size_allocated_(0) { + } + GENERIC_2D_ARRAY(const GENERIC_2D_ARRAY& src) + : array_(NULL), empty_(static_cast(0)), dim1_(0), dim2_(0), + size_allocated_(0) { + *this = src; } virtual ~GENERIC_2D_ARRAY() { delete[] array_; } + void operator=(const GENERIC_2D_ARRAY& src) { + ResizeNoInit(src.dim1(), src.dim2()); + memcpy(array_, src.array_, num_elements() * sizeof(array_[0])); + } + + // Reallocate the array to the given size. Does not keep old data, but does + // not initialize the array either. + void ResizeNoInit(int size1, int size2) { + int new_size = size1 * size2; + if (new_size > size_allocated_) { + delete [] array_; + array_ = new T[new_size]; + size_allocated_ = new_size; + } + dim1_ = size1; + dim2_ = size2; + } + // Reallocate the array to the given size. Does not keep old data. void Resize(int size1, int size2, const T& empty) { empty_ = empty; - if (size1 != dim1_ || size2 != dim2_) { - dim1_ = size1; - dim2_ = size2; - delete [] array_; - array_ = new T[dim1_ * dim2_]; - } + ResizeNoInit(size1, size2); Clear(); } // Reallocate the array to the given size, keeping old data. void ResizeWithCopy(int size1, int size2) { if (size1 != dim1_ || size2 != dim2_) { - T* new_array = new T[size1 * size2]; + int new_size = size1 * size2; + T* new_array = new T[new_size]; for (int col = 0; col < size1; ++col) { for (int row = 0; row < size2; ++row) { int old_index = col * dim2() + row; @@ -89,6 +121,7 @@ class GENERIC_2D_ARRAY { array_ = new_array; dim1_ = size1; dim2_ = size2; + size_allocated_ = new_size; } } @@ -108,9 +141,16 @@ class GENERIC_2D_ARRAY { if (fwrite(array_, sizeof(*array_), size, fp) != size) return false; return true; } + bool Serialize(tesseract::TFile* fp) const { + if (!SerializeSize(fp)) return false; + if (fp->FWrite(&empty_, sizeof(empty_), 1) != 1) return false; + int size = num_elements(); + if (fp->FWrite(array_, sizeof(*array_), size) != size) return false; + return true; + } // Reads from the given file. Returns false in case of error. - // Only works with bitwise-serializeable typ + // Only works with bitwise-serializeable types! // If swap is true, assumes a big/little-endian swap is needed. bool DeSerialize(bool swap, FILE* fp) { if (!DeSerializeSize(swap, fp)) return false; @@ -124,6 +164,18 @@ class GENERIC_2D_ARRAY { } return true; } + bool DeSerialize(bool swap, tesseract::TFile* fp) { + if (!DeSerializeSize(swap, fp)) return false; + if (fp->FRead(&empty_, sizeof(empty_), 1) != 1) return false; + if (swap) ReverseN(&empty_, sizeof(empty_)); + int size = num_elements(); + if (fp->FRead(array_, sizeof(*array_), size) != size) return false; + if (swap) { + for (int i = 0; i < size; ++i) + ReverseN(&array_[i], sizeof(array_[i])); + } + return true; + } // Writes to the given file. Returns false in case of error. // Assumes a T::Serialize(FILE*) const function. @@ -165,11 +217,17 @@ class GENERIC_2D_ARRAY { } // Put a list element into the matrix at a specific location. + void put(ICOORD pos, const T& thing) { + array_[this->index(pos.x(), pos.y())] = thing; + } void put(int column, int row, const T& thing) { array_[this->index(column, row)] = thing; } // Get the item at a specified location from the matrix. + T get(ICOORD pos) const { + return array_[this->index(pos.x(), pos.y())]; + } T get(int column, int row) const { return array_[this->index(column, row)]; } @@ -189,6 +247,207 @@ class GENERIC_2D_ARRAY { return &array_[this->index(column, 0)]; } + // Adds addend to *this, element-by-element. + void operator+=(const GENERIC_2D_ARRAY& addend) { + if (dim2_ == addend.dim2_) { + // Faster if equal size in the major dimension. + int size = MIN(num_elements(), addend.num_elements()); + for (int i = 0; i < size; ++i) { + array_[i] += addend.array_[i]; + } + } else { + for (int x = 0; x < dim1_; x++) { + for (int y = 0; y < dim2_; y++) { + (*this)(x, y) += addend(x, y); + } + } + } + } + // Subtracts minuend from *this, element-by-element. + void operator-=(const GENERIC_2D_ARRAY& minuend) { + if (dim2_ == minuend.dim2_) { + // Faster if equal size in the major dimension. + int size = MIN(num_elements(), minuend.num_elements()); + for (int i = 0; i < size; ++i) { + array_[i] -= minuend.array_[i]; + } + } else { + for (int x = 0; x < dim1_; x++) { + for (int y = 0; y < dim2_; y++) { + (*this)(x, y) -= minuend(x, y); + } + } + } + } + // Adds addend to all elements. + void operator+=(const T& addend) { + int size = num_elements(); + for (int i = 0; i < size; ++i) { + array_[i] += addend; + } + } + // Multiplies *this by factor, element-by-element. + void operator*=(const T& factor) { + int size = num_elements(); + for (int i = 0; i < size; ++i) { + array_[i] *= factor; + } + } + // Clips *this to the given range. + void Clip(const T& rangemin, const T& rangemax) { + int size = num_elements(); + for (int i = 0; i < size; ++i) { + array_[i] = ClipToRange(array_[i], rangemin, rangemax); + } + } + // Returns true if all elements of *this are within the given range. + // Only uses operator< + bool WithinBounds(const T& rangemin, const T& rangemax) const { + int size = num_elements(); + for (int i = 0; i < size; ++i) { + const T& value = array_[i]; + if (value < rangemin || rangemax < value) + return false; + } + return true; + } + // Normalize the whole array. + double Normalize() { + int size = num_elements(); + if (size <= 0) return 0.0; + // Compute the mean. + double mean = 0.0; + for (int i = 0; i < size; ++i) { + mean += array_[i]; + } + mean /= size; + // Subtract the mean and compute the standard deviation. + double sd = 0.0; + for (int i = 0; i < size; ++i) { + double normed = array_[i] - mean; + array_[i] = normed; + sd += normed * normed; + } + sd = sqrt(sd / size); + if (sd > 0.0) { + // Divide by the sd. + for (int i = 0; i < size; ++i) { + array_[i] /= sd; + } + } + return sd; + } + + // Returns the maximum value of the array. + T Max() const { + int size = num_elements(); + if (size <= 0) return empty_; + // Compute the max. + T max_value = array_[0]; + for (int i = 1; i < size; ++i) { + const T& value = array_[i]; + if (value > max_value) max_value = value; + } + return max_value; + } + + // Returns the maximum absolute value of the array. + T MaxAbs() const { + int size = num_elements(); + if (size <= 0) return empty_; + // Compute the max. + T max_abs = static_cast(0); + for (int i = 0; i < size; ++i) { + T value = static_cast(fabs(array_[i])); + if (value > max_abs) max_abs = value; + } + return max_abs; + } + + // Accumulates the element-wise sums of squares of src into *this. + void SumSquares(const GENERIC_2D_ARRAY& src) { + int size = num_elements(); + for (int i = 0; i < size; ++i) { + array_[i] += src.array_[i] * src.array_[i]; + } + } + + // Scales each element using the ada-grad algorithm, ie array_[i] by + // sqrt(num_samples/max(1,sqsum[i])). + void AdaGradScaling(const GENERIC_2D_ARRAY& sqsum, int num_samples) { + int size = num_elements(); + for (int i = 0; i < size; ++i) { + array_[i] *= sqrt(num_samples / MAX(1.0, sqsum.array_[i])); + } + } + + void AssertFinite() const { + int size = num_elements(); + for (int i = 0; i < size; ++i) { + ASSERT_HOST(isfinite(array_[i])); + } + } + + // REGARDLESS OF THE CURRENT DIMENSIONS, treats the data as a + // num_dims-dimensional array/tensor with dimensions given by dims, (ordered + // from most significant to least significant, the same as standard C arrays) + // and moves src_dim to dest_dim, with the initial dest_dim and any dimensions + // in between shifted towards the hole left by src_dim. Example: + // Current data content: array_=[0, 1, 2, ....119] + // perhaps *this may be of dim[40, 3], with values [[0, 1, 2][3, 4, 5]... + // but the current dimensions are irrelevant. + // num_dims = 4, dims=[5, 4, 3, 2] + // src_dim=3, dest_dim=1 + // tensor=[[[[0, 1][2, 3][4, 5]] + // [[6, 7][8, 9][10, 11]] + // [[12, 13][14, 15][16, 17]] + // [[18, 19][20, 21][22, 23]]] + // [[[24, 25]... + // output dims =[5, 2, 4, 3] + // output tensor=[[[[0, 2, 4][6, 8, 10][12, 14, 16][18, 20, 22]] + // [[1, 3, 5][7, 9, 11][13, 15, 17][19, 21, 23]]] + // [[[24, 26, 28]... + // which is stored in the array_ as: + // [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 1, 3, 5, 7, 9, 11, 13...] + // NOTE: the 2 stored matrix dimensions are simply copied from *this. To + // change the dimensions after the transpose, use ResizeNoInit. + // Higher dimensions above 2 are strictly the responsibility of the caller. + void RotatingTranspose(const int* dims, int num_dims, int src_dim, + int dest_dim, GENERIC_2D_ARRAY* result) const { + int max_d = MAX(src_dim, dest_dim); + int min_d = MIN(src_dim, dest_dim); + // In a tensor of shape [d0, d1... min_d, ... max_d, ... dn-2, dn-1], the + // ends outside of min_d and max_d are unaffected, with [max_d +1, dn-1] + // being contiguous blocks of data that will move together, and + // [d0, min_d -1] being replicas of the transpose operation. + // num_replicas represents the large dimensions unchanged by the operation. + // move_size represents the small dimensions unchanged by the operation. + // src_step represents the stride in the src between each adjacent group + // in the destination. + int num_replicas = 1, move_size = 1, src_step = 1; + for (int d = 0; d < min_d; ++d) num_replicas *= dims[d]; + for (int d = max_d + 1; d < num_dims; ++d) move_size *= dims[d]; + for (int d = src_dim + 1; d < num_dims; ++d) src_step *= dims[d]; + if (src_dim > dest_dim) src_step *= dims[src_dim]; + // wrap_size is the size of a single replica, being the amount that is + // handled num_replicas times. + int wrap_size = move_size; + for (int d = min_d; d <= max_d; ++d) wrap_size *= dims[d]; + result->ResizeNoInit(dim1_, dim2_); + result->empty_ = empty_; + const T* src = array_; + T* dest = result->array_; + for (int replica = 0; replica < num_replicas; ++replica) { + for (int start = 0; start < src_step; start += move_size) { + for (int pos = start; pos < wrap_size; pos += src_step) { + memcpy(dest, src + pos, sizeof(*dest) * move_size); + dest += move_size; + } + } + src += wrap_size; + } + } + // Delete objects pointed to by array_[i]. void delete_matrix_pointers() { int size = num_elements(); @@ -208,6 +467,13 @@ class GENERIC_2D_ARRAY { if (fwrite(&size, sizeof(size), 1, fp) != 1) return false; return true; } + bool SerializeSize(tesseract::TFile* fp) const { + inT32 size = dim1_; + if (fp->FWrite(&size, sizeof(size), 1) != 1) return false; + size = dim2_; + if (fp->FWrite(&size, sizeof(size), 1) != 1) return false; + return true; + } // Factored helper to deserialize the size. // If swap is true, assumes a big/little-endian swap is needed. bool DeSerializeSize(bool swap, FILE* fp) { @@ -221,11 +487,26 @@ class GENERIC_2D_ARRAY { Resize(size1, size2, empty_); return true; } + bool DeSerializeSize(bool swap, tesseract::TFile* fp) { + inT32 size1, size2; + if (fp->FRead(&size1, sizeof(size1), 1) != 1) return false; + if (fp->FRead(&size2, sizeof(size2), 1) != 1) return false; + if (swap) { + ReverseN(&size1, sizeof(size1)); + ReverseN(&size2, sizeof(size2)); + } + Resize(size1, size2, empty_); + return true; + } T* array_; T empty_; // The unused cell. int dim1_; // Size of the 1st dimension in indexing functions. int dim2_; // Size of the 2nd dimension in indexing functions. + // The total size to which the array can be expanded before a realloc is + // needed. If Resize is used, memory is retained so it can be re-expanded + // without a further alloc, and this stores the allocated size. + int size_allocated_; }; // A generic class to store a banded triangular matrix with entries of type T. diff --git a/ccstruct/mod128.cpp b/ccstruct/mod128.cpp index 17776a2783..4e5f4bd325 100644 --- a/ccstruct/mod128.cpp +++ b/ccstruct/mod128.cpp @@ -86,16 +86,3 @@ DIR128::DIR128( //from fcoord while (high - low > 1); dir = low; } - - -/********************************************************************** - * dir_to_gradient - * - * Convert a direction to a vector. - **********************************************************************/ - -#if 0 // code is buggy for negative dir and unused -ICOORD DIR128::vector() const { //convert to vector - return dirtab[dir]; //easy really -} -#endif diff --git a/ccstruct/mod128.h b/ccstruct/mod128.h index 9b31d83a64..c0e71a423c 100644 --- a/ccstruct/mod128.h +++ b/ccstruct/mod128.h @@ -77,7 +77,6 @@ class DLLSYM DIR128 inT8 get_dir() const { //access function return dir; } - ICOORD vector() const; //turn to vector private: inT8 dir; //a direction diff --git a/ccstruct/pageres.cpp b/ccstruct/pageres.cpp index d7f0edaf80..32635365e4 100644 --- a/ccstruct/pageres.cpp +++ b/ccstruct/pageres.cpp @@ -892,7 +892,7 @@ void WERD_RES::FakeClassifyWord(int blob_count, BLOB_CHOICE** choices) { void WERD_RES::FakeWordFromRatings(PermuterType permuter) { int num_blobs = ratings->dimension(); WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs); - word_choice->set_permuter(TOP_CHOICE_PERM); + word_choice->set_permuter(permuter); for (int b = 0; b < num_blobs; ++b) { UNICHAR_ID unichar_id = UNICHAR_SPACE; float rating = MAX_INT32; @@ -1105,6 +1105,7 @@ void WERD_RES::InitNonPointers() { x_height = 0.0; caps_height = 0.0; baseline_shift = 0.0f; + space_certainty = 0.0f; guessed_x_ht = TRUE; guessed_caps_ht = TRUE; combination = FALSE; diff --git a/ccstruct/pageres.h b/ccstruct/pageres.h index 446bd7240d..33b9f4cb35 100644 --- a/ccstruct/pageres.h +++ b/ccstruct/pageres.h @@ -295,6 +295,9 @@ class WERD_RES : public ELIST_LINK { float x_height; // post match estimate float caps_height; // post match estimate float baseline_shift; // post match estimate. + // Certainty score for the spaces either side of this word (LSTM mode). + // MIN this value with the actual word certainty. + float space_certainty; /* To deal with fuzzy spaces we need to be able to combine "words" to form From 0cbbbe632115b96ef642c9631538b04cb3fe3a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 30 Nov 2016 19:52:18 +0100 Subject: [PATCH 045/132] backport from 4.00: SkipDeSerialize and changes in ccutil --- ccmain/equationdetect.cpp | 1 + ccutil/ccutil.h | 4 +- ccutil/genericheap.h | 34 ++++++++++------- ccutil/genericvector.h | 79 +++++++++++++++++++++++++++++---------- ccutil/helpers.h | 2 +- ccutil/ocrclass.h | 1 - ccutil/platform.h | 2 + ccutil/serialis.cpp | 2 +- ccutil/strngs.cpp | 8 ++++ ccutil/strngs.h | 2 + 10 files changed, 97 insertions(+), 38 deletions(-) diff --git a/ccmain/equationdetect.cpp b/ccmain/equationdetect.cpp index 06aab24923..5fa955e432 100644 --- a/ccmain/equationdetect.cpp +++ b/ccmain/equationdetect.cpp @@ -20,6 +20,7 @@ #ifdef _MSC_VER #pragma warning(disable:4244) // Conversion warnings #include +#include #endif #ifdef __MINGW32__ diff --git a/ccutil/ccutil.h b/ccutil/ccutil.h index 974ef7d225..e4ec4f9ae8 100644 --- a/ccutil/ccutil.h +++ b/ccutil/ccutil.h @@ -26,9 +26,7 @@ #include "params.h" #include "unicharset.h" -#ifdef _WIN32 -#include -#else +#ifndef _WIN32 #include #include #endif diff --git a/ccutil/genericheap.h b/ccutil/genericheap.h index bb5f8ddc79..ccf273b33a 100644 --- a/ccutil/genericheap.h +++ b/ccutil/genericheap.h @@ -108,6 +108,8 @@ class GenericHeap { const Pair& PeekTop() const { return heap_[0]; } + // Get the value of the worst (largest, defined by operator< ) element. + const Pair& PeekWorst() const { return heap_[IndexOfWorst()]; } // Removes the top element of the heap. If entry is not NULL, the element // is copied into *entry, otherwise it is discarded. @@ -136,22 +138,12 @@ class GenericHeap { // not NULL, the element is copied into *entry, otherwise it is discarded. // Time = O(n). Returns false if the heap was already empty. bool PopWorst(Pair* entry) { - int heap_size = heap_.size(); - if (heap_size == 0) return false; // It cannot be empty! - - // Find the maximum element. Its index is guaranteed to be greater than - // the index of the parent of the last element, since by the heap invariant - // the parent must be less than or equal to the children. - int worst_index = heap_size - 1; - int end_parent = ParentNode(worst_index); - for (int i = worst_index - 1; i > end_parent; --i) { - if (heap_[worst_index] < heap_[i]) - worst_index = i; - } + int worst_index = IndexOfWorst(); + if (worst_index < 0) return false; // It cannot be empty! // Extract the worst element from the heap, leaving a hole at worst_index. if (entry != NULL) *entry = heap_[worst_index]; - --heap_size; + int heap_size = heap_.size() - 1; if (heap_size > 0) { // Sift the hole upwards to match the last element of the heap_ Pair hole_pair = heap_[heap_size]; @@ -162,6 +154,22 @@ class GenericHeap { return true; } + // Returns the index of the worst element. Time = O(n/2). + int IndexOfWorst() const { + int heap_size = heap_.size(); + if (heap_size == 0) return -1; // It cannot be empty! + + // Find the maximum element. Its index is guaranteed to be greater than + // the index of the parent of the last element, since by the heap invariant + // the parent must be less than or equal to the children. + int worst_index = heap_size - 1; + int end_parent = ParentNode(worst_index); + for (int i = worst_index - 1; i > end_parent; --i) { + if (heap_[worst_index] < heap_[i]) worst_index = i; + } + return worst_index; + } + // The pointed-to Pair has changed its key value, so the location of pair // is reshuffled to maintain the heap invariant. // Must be a valid pointer to an element of the heap_! diff --git a/ccutil/genericvector.h b/ccutil/genericvector.h index 6d8187b09d..3a70e21ce0 100644 --- a/ccutil/genericvector.h +++ b/ccutil/genericvector.h @@ -174,6 +174,8 @@ class GenericVector { // If swap is true, assumes a big/little-endian swap is needed. bool DeSerialize(bool swap, FILE* fp); bool DeSerialize(bool swap, tesseract::TFile* fp); + // Skips the deserialization of the vector. + static bool SkipDeSerialize(bool swap, tesseract::TFile* fp); // Writes a vector of classes to the given file. Assumes the existence of // bool T::Serialize(FILE* fp) const that returns false in case of error. // Returns false in case of error. @@ -186,6 +188,8 @@ class GenericVector { // If swap is true, assumes a big/little-endian swap is needed. bool DeSerializeClasses(bool swap, FILE* fp); bool DeSerializeClasses(bool swap, tesseract::TFile* fp); + // Calls SkipDeSerialize on the elements of the vector. + static bool SkipDeSerializeClasses(bool swap, tesseract::TFile* fp); // Allocates a new array of double the current_size, copies over the // information from data to the new location, deletes data and returns @@ -360,7 +364,7 @@ inline bool LoadDataFromFile(const STRING& filename, size_t size = ftell(fp); fseek(fp, 0, SEEK_SET); // Pad with a 0, just in case we treat the result as a string. - data->init_to_size((int)size + 1, 0); + data->init_to_size(static_cast(size) + 1, 0); bool result = fread(&(*data)[0], 1, size, fp) == size; fclose(fp); return result; @@ -555,34 +559,54 @@ class PointerVector : public GenericVector { } bool DeSerialize(bool swap, TFile* fp) { inT32 reserved; - if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false; - if (swap) Reverse32(&reserved); + if (!DeSerializeSize(swap, fp, &reserved)) return false; GenericVector::reserve(reserved); truncate(0); for (int i = 0; i < reserved; ++i) { - inT8 non_null; - if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false; - T* item = NULL; - if (non_null) { - item = new T; - if (!item->DeSerialize(swap, fp)) { - delete item; - return false; - } - this->push_back(item); - } else { - // Null elements should keep their place in the vector. - this->push_back(NULL); + if (!DeSerializeElement(swap, fp)) return false; + } + return true; + } + // Enables deserialization of a selection of elements. Note that in order to + // retain the integrity of the stream, the caller must call some combination + // of DeSerializeElement and DeSerializeSkip of the exact number returned in + // *size, assuming a true return. + static bool DeSerializeSize(bool swap, TFile* fp, inT32* size) { + if (fp->FRead(size, sizeof(*size), 1) != 1) return false; + if (swap) Reverse32(size); + return true; + } + // Reads and appends to the vector the next element of the serialization. + bool DeSerializeElement(bool swap, TFile* fp) { + inT8 non_null; + if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false; + T* item = NULL; + if (non_null) { + item = new T; + if (!item->DeSerialize(swap, fp)) { + delete item; + return false; } + this->push_back(item); + } else { + // Null elements should keep their place in the vector. + this->push_back(NULL); + } + return true; + } + // Skips the next element of the serialization. + static bool DeSerializeSkip(bool swap, TFile* fp) { + inT8 non_null; + if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false; + if (non_null) { + if (!T::SkipDeSerialize(swap, fp)) return false; } return true; } // Sorts the items pointed to by the members of this vector using // t::operator<(). - void sort() { - sort(&sort_ptr_cmp); - } + void sort() { this->GenericVector::sort(&sort_ptr_cmp); } }; } // namespace tesseract @@ -925,6 +949,13 @@ bool GenericVector::DeSerialize(bool swap, tesseract::TFile* fp) { } return true; } +template +bool GenericVector::SkipDeSerialize(bool swap, tesseract::TFile* fp) { + inT32 reserved; + if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false; + if (swap) Reverse32(&reserved); + return fp->FRead(NULL, sizeof(T), reserved) == reserved; +} // Writes a vector of classes to the given file. Assumes the existence of // bool T::Serialize(FILE* fp) const that returns false in case of error. @@ -975,6 +1006,16 @@ bool GenericVector::DeSerializeClasses(bool swap, tesseract::TFile* fp) { } return true; } +template +bool GenericVector::SkipDeSerializeClasses(bool swap, tesseract::TFile* fp) { + uinT32 reserved; + if (fp->FRead(&reserved, sizeof(reserved), 1) != 1) return false; + if (swap) Reverse32(&reserved); + for (int i = 0; i < reserved; ++i) { + if (!T::SkipDeSerialize(swap, fp)) return false; + } + return true; +} // This method clear the current object, then, does a shallow copy of // its argument, and finally invalidates its argument. diff --git a/ccutil/helpers.h b/ccutil/helpers.h index 51dd3b0b44..a2276bc451 100644 --- a/ccutil/helpers.h +++ b/ccutil/helpers.h @@ -73,7 +73,7 @@ class TRand { // Remove newline (if any) at the end of the string. inline void chomp_string(char *str) { - int last_index = (int)strlen(str) - 1; + int last_index = static_cast(strlen(str)) - 1; while (last_index >= 0 && (str[last_index] == '\n' || str[last_index] == '\r')) { str[last_index--] = '\0'; diff --git a/ccutil/ocrclass.h b/ccutil/ocrclass.h index f352956fa0..cb83c6d6b6 100644 --- a/ccutil/ocrclass.h +++ b/ccutil/ocrclass.h @@ -29,7 +29,6 @@ #ifndef __GNUC__ #ifdef _WIN32 -#include #include "gettimeofday.h" #endif #else diff --git a/ccutil/platform.h b/ccutil/platform.h index 219f9e31e0..d60a45b601 100644 --- a/ccutil/platform.h +++ b/ccutil/platform.h @@ -24,6 +24,8 @@ #define DLLSYM #ifdef _WIN32 +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN #ifdef __GNUC__ #define ultoa _ultoa #endif /* __GNUC__ */ diff --git a/ccutil/serialis.cpp b/ccutil/serialis.cpp index ff3b278a7e..d1eed58465 100644 --- a/ccutil/serialis.cpp +++ b/ccutil/serialis.cpp @@ -95,7 +95,7 @@ int TFile::FRead(void* buffer, int size, int count) { char* char_buffer = reinterpret_cast(buffer); if (data_->size() - offset_ < required_size) required_size = data_->size() - offset_; - if (required_size > 0) + if (required_size > 0 && char_buffer != NULL) memcpy(char_buffer, &(*data_)[offset_], required_size); offset_ += required_size; return required_size / size; diff --git a/ccutil/strngs.cpp b/ccutil/strngs.cpp index 0760852e90..5a9cfd0d48 100644 --- a/ccutil/strngs.cpp +++ b/ccutil/strngs.cpp @@ -181,6 +181,14 @@ bool STRING::DeSerialize(bool swap, TFile* fp) { return true; } +// As DeSerialize, but only seeks past the data - hence a static method. +bool STRING::SkipDeSerialize(bool swap, tesseract::TFile* fp) { + inT32 len; + if (fp->FRead(&len, sizeof(len), 1) != 1) return false; + if (swap) ReverseN(&len, sizeof(len)); + return fp->FRead(NULL, 1, len) == len; +} + BOOL8 STRING::contains(const char c) const { return (c != '\0') && (strchr (GetCStr(), c) != NULL); } diff --git a/ccutil/strngs.h b/ccutil/strngs.h index 9308cc67c8..1fe42b6076 100644 --- a/ccutil/strngs.h +++ b/ccutil/strngs.h @@ -60,6 +60,8 @@ class TESS_API STRING // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool DeSerialize(bool swap, tesseract::TFile* fp); + // As DeSerialize, but only seeks past the data - hence a static method. + static bool SkipDeSerialize(bool swap, tesseract::TFile* fp); BOOL8 contains(const char c) const; inT32 length() const; From 5750e728d9760c73259894e827db9d8e7bf776f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 30 Nov 2016 21:58:57 +0100 Subject: [PATCH 046/132] use TessHashMap instead of hash_map, unordered_map --- training/ligature_table.h | 2 +- training/pango_font_info.cpp | 6 +++--- training/pango_font_info.h | 5 +++-- training/stringrenderer.h | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/training/ligature_table.h b/training/ligature_table.h index ecae7943dd..83e7dc3c4d 100644 --- a/training/ligature_table.h +++ b/training/ligature_table.h @@ -32,7 +32,7 @@ namespace tesseract { class PangoFontInfo; // defined in pango_font_info.h // Map to substitute strings for ligatures. -typedef hash_map LigHash; +typedef TessHashMap LigHash; class LigatureTable { public: diff --git a/training/pango_font_info.cpp b/training/pango_font_info.cpp index c2b508c02d..6ca8c8998f 100644 --- a/training/pango_font_info.cpp +++ b/training/pango_font_info.cpp @@ -686,7 +686,7 @@ void FontUtils::GetAllRenderableCharacters(const vector& fonts, // Utilities written to be backward compatible with StringRender /* static */ -int FontUtils::FontScore(const unordered_map& ch_map, +int FontUtils::FontScore(const TessHashMap& ch_map, const string& fontname, int* raw_score, vector* ch_flags) { @@ -703,7 +703,7 @@ int FontUtils::FontScore(const unordered_map& ch_map, } *raw_score = 0; int ok_chars = 0; - for (unordered_map::const_iterator it = ch_map.begin(); + for (TessHashMap::const_iterator it = ch_map.begin(); it != ch_map.end(); ++it) { bool covered = (IsWhitespace(it->first) || (pango_coverage_get(coverage, it->first) @@ -721,7 +721,7 @@ int FontUtils::FontScore(const unordered_map& ch_map, /* static */ -string FontUtils::BestFonts(const unordered_map& ch_map, +string FontUtils::BestFonts(const TessHashMap& ch_map, vector > >* fonts) { const double kMinOKFraction = 0.99; // Weighted fraction of characters that must be renderable in a font to make diff --git a/training/pango_font_info.h b/training/pango_font_info.h index f07d712f11..fc46fcf48b 100644 --- a/training/pango_font_info.h +++ b/training/pango_font_info.h @@ -185,7 +185,8 @@ class FontUtils { // In the flags vector, each flag is set according to whether the // corresponding character (in order of iterating ch_map) can be rendered. // The return string is a list of the acceptable fonts that were used. - static string BestFonts(const unordered_map& ch_map, + static string BestFonts( + const TessHashMap& ch_map, vector > >* font_flag); // FontScore returns the weighted renderability score of the given @@ -193,7 +194,7 @@ class FontUtils { // is also returned in raw_score. // The values in the bool vector ch_flags correspond to whether the // corresponding character (in order of iterating ch_map) can be rendered. - static int FontScore(const unordered_map& ch_map, + static int FontScore(const TessHashMap& ch_map, const string& fontname, int* raw_score, vector* ch_flags); diff --git a/training/stringrenderer.h b/training/stringrenderer.h index 9b72cf6e3a..f0ba0c0b00 100644 --- a/training/stringrenderer.h +++ b/training/stringrenderer.h @@ -210,7 +210,7 @@ class StringRenderer { Boxa* page_boxes_; // Objects cached for subsequent calls to RenderAllFontsToImage() - hash_map char_map_; // Time-saving char histogram. + TessHashMap char_map_; // Time-saving char histogram. int total_chars_; // Number in the string to be rendered. int font_index_; // Index of next font to use in font list. int last_offset_; // Offset returned from last successful rendering From 775a108dc78e094923119af6d842193bda1eef6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 2 Dec 2016 19:50:54 +0100 Subject: [PATCH 047/132] backport from 4.00: enable selection of OCR engine mode from command line --- api/tesseractmain.cpp | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index a6f766eed1..0ac3b1f0c9 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -36,7 +36,6 @@ #if defined(HAVE_TIFFIO_H) && defined(_WIN32) #include -#include static void Win32WarningHandler(const char* module, const char* fmt, va_list ap) { @@ -116,6 +115,18 @@ void PrintHelpForPSM() { " 12 Sparse text with OSD.\n" " 13 Raw line. Treat the image as a single text line,\n" "\t\t\tbypassing hacks that are Tesseract-specific.\n"; + + printf("%s", msg); +} + +void PrintHelpForOEM() { + const char* msg = + "OCR Engine modes:\n" + " 0 Original Tesseract only.\n" + " 1 Cube only.\n" + " 2 Tesseract + cube.\n" + " 3 Default, based on what is available.\n"; + printf("%s", msg); } @@ -131,15 +142,18 @@ void PrintHelpMessage(const char* program) { " -c VAR=VALUE Set value for config variables.\n" " Multiple -c arguments are allowed.\n" " -psm NUM Specify page segmentation mode.\n" + " -oem NUM Specify OCR Engine mode.\n" "NOTE: These options must occur before any configfile.\n"; printf("\n%s\n", ocr_options); PrintHelpForPSM(); + PrintHelpForOEM(); const char* single_options = "Single options:\n" " -h, --help Show this help message.\n" " --help-psm Show page segmentation modes.\n" + " --help-oem Show OCR Engine modes.\n" " -v, --version Show version information.\n" " --list-langs List available languages for tesseract engine.\n" " --print-parameters Print tesseract parameters to stdout.\n"; @@ -213,7 +227,8 @@ void ParseArgs(const int argc, char** argv, const char** lang, const char** datapath, bool* list_langs, bool* print_parameters, GenericVector* vars_vec, GenericVector* vars_values, int* arg_i, - tesseract::PageSegMode* pagesegmode) { + tesseract::PageSegMode* pagesegmode, + tesseract::OcrEngineMode* enginemode) { if (argc == 1) { PrintHelpMessage(argv[0]); exit(0); @@ -228,6 +243,10 @@ void ParseArgs(const int argc, char** argv, const char** lang, PrintHelpForPSM(); exit(0); } + if ((strcmp(argv[1], "--help-oem") == 0)) { + PrintHelpForOEM(); + exit(0); + } if ((strcmp(argv[1], "-v") == 0) || (strcmp(argv[1], "--version") == 0)) { PrintVersionInfo(); exit(0); @@ -257,6 +276,9 @@ void ParseArgs(const int argc, char** argv, const char** lang, } else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) { *pagesegmode = static_cast(atoi(argv[i + 1])); ++i; + } else if (strcmp(argv[i], "-oem") == 0 && i + 1 < argc) { + *enginemode = static_cast(atoi(argv[i + 1])); + ++i; } else if (strcmp(argv[i], "--print-parameters") == 0) { noocr = true; *print_parameters = true; @@ -354,6 +376,7 @@ int main(int argc, char** argv) { bool print_parameters = false; int arg_i = 1; tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; + tesseract::OcrEngineMode enginemode = tesseract::OEM_DEFAULT; /* main() calls functions like ParseArgs which call exit(). * This results in memory leaks if vars_vec and vars_values are * declared as auto variables (destructor is not called then). */ @@ -366,7 +389,8 @@ int main(int argc, char** argv) { #endif /* HAVE_TIFFIO_H && _WIN32 */ ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs, - &print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode); + &print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode, + &enginemode); bool banner = false; if (outputbase != NULL && strcmp(outputbase, "-") && @@ -379,8 +403,8 @@ int main(int argc, char** argv) { api.SetOutputName(outputbase); - int init_failed = api.Init(datapath, lang, tesseract::OEM_DEFAULT, - &(argv[arg_i]), argc - arg_i, &vars_vec, &vars_values, false); + int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]), + argc - arg_i, &vars_vec, &vars_values, false); if (init_failed) { fprintf(stderr, "Could not initialize tesseract.\n"); exit(1); From 2f8c1e7b52e3b4570a498a5ec3846192c445159d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 2 Dec 2016 20:03:41 +0100 Subject: [PATCH 048/132] backport from 4.00: split Dict::Load to SetupForLoad, Load and FinishLoad --- ccmain/tessedit.cpp | 4 +++- dict/dict.cpp | 47 +++++++++++++++++++++++++++++++++------------ dict/dict.h | 22 ++++++++++++++------- wordrec/tface.cpp | 6 +++++- 4 files changed, 58 insertions(+), 21 deletions(-) diff --git a/ccmain/tessedit.cpp b/ccmain/tessedit.cpp index 8c1fb80837..b2fa02196a 100644 --- a/ccmain/tessedit.cpp +++ b/ccmain/tessedit.cpp @@ -468,7 +468,9 @@ int Tesseract::init_tesseract_lm(const char *arg0, if (!init_tesseract_lang_data(arg0, textbase, language, OEM_TESSERACT_ONLY, NULL, 0, NULL, NULL, false)) return -1; - getDict().Load(Dict::GlobalDawgCache()); + getDict().SetupForLoad(Dict::GlobalDawgCache()); + getDict().Load(tessdata_manager.GetDataFileName().string(), lang); + getDict().FinishLoad(); tessdata_manager.End(); return 0; } diff --git a/dict/dict.cpp b/dict/dict.cpp index 0ae4756a43..9d9eaf12a3 100644 --- a/dict/dict.cpp +++ b/dict/dict.cpp @@ -190,6 +190,7 @@ Dict::Dict(CCUtil *ccutil) } Dict::~Dict() { + End(); if (hyphen_word_ != NULL) delete hyphen_word_; if (output_ambig_words_file_ != NULL) fclose(output_ambig_words_file_); } @@ -201,10 +202,8 @@ DawgCache *Dict::GlobalDawgCache() { return &cache; } -void Dict::Load(DawgCache *dawg_cache) { - STRING name; - STRING &lang = getCCUtil()->lang; - +// Sets up ready for a Load. +void Dict::SetupForLoad(DawgCache *dawg_cache) { if (dawgs_.length() != 0) this->End(); apostrophe_unichar_id_ = getUnicharset().unichar_to_id(kApostropheSymbol); @@ -219,10 +218,10 @@ void Dict::Load(DawgCache *dawg_cache) { dawg_cache_ = new DawgCache(); dawg_cache_is_ours_ = true; } +} - TessdataManager &tessdata_manager = getCCUtil()->tessdata_manager; - const char *data_file_name = tessdata_manager.GetDataFileName().string(); - +// Loads the dawgs needed by Tesseract. Call FinishLoad() after. +void Dict::Load(const char *data_file_name, const STRING &lang) { // Load dawgs_. if (load_punc_dawg) { punc_dawg_ = dawg_cache_->GetSquishedDawg( @@ -254,6 +253,7 @@ void Dict::Load(DawgCache *dawg_cache) { if (unambig_dawg_) dawgs_ += unambig_dawg_; } + STRING name; if (((STRING &)user_words_suffix).length() > 0 || ((STRING &)user_words_file).length() > 0) { Trie *trie_ptr = new Trie(DAWG_TYPE_WORD, lang, USER_DAWG_PERM, @@ -299,8 +299,13 @@ void Dict::Load(DawgCache *dawg_cache) { // This dawg is temporary and should not be searched by letter_is_ok. pending_words_ = new Trie(DAWG_TYPE_WORD, lang, NO_PERM, getUnicharset().size(), dawg_debug_level); +} - // Construct a list of corresponding successors for each dawg. Each entry i +// Completes the loading process after Load(). +// Returns false if no dictionaries were loaded. +bool Dict::FinishLoad() { + if (dawgs_.empty()) return false; + // Construct a list of corresponding successors for each dawg. Each entry, i, // in the successors_ vector is a vector of integers that represent the // indices into the dawgs_ vector of the successors for dawg i. successors_.reserve(dawgs_.length()); @@ -315,6 +320,7 @@ void Dict::Load(DawgCache *dawg_cache) { } successors_ += lst; } + return true; } void Dict::End() { @@ -367,6 +373,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args, // Initialization. PermuterType curr_perm = NO_PERM; dawg_args->updated_dawgs->clear(); + dawg_args->valid_end = false; // Go over the active_dawgs vector and insert DawgPosition records // with the updated ref (an edge with the corresponding unichar id) into @@ -404,6 +411,9 @@ int Dict::def_letter_is_okay(void* void_dawg_args, dawg_debug_level > 0, "Append transition from punc dawg to current dawgs: "); if (sdawg->permuter() > curr_perm) curr_perm = sdawg->permuter(); + if (sdawg->end_of_word(dawg_edge) && + punc_dawg->end_of_word(punc_transition_edge)) + dawg_args->valid_end = true; } } } @@ -418,6 +428,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args, dawg_debug_level > 0, "Extend punctuation dawg: "); if (PUNC_PERM > curr_perm) curr_perm = PUNC_PERM; + if (punc_dawg->end_of_word(punc_edge)) dawg_args->valid_end = true; } continue; } @@ -435,6 +446,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args, dawg_debug_level > 0, "Return to punctuation dawg: "); if (dawg->permuter() > curr_perm) curr_perm = dawg->permuter(); + if (punc_dawg->end_of_word(punc_edge)) dawg_args->valid_end = true; } } @@ -444,8 +456,8 @@ int Dict::def_letter_is_okay(void* void_dawg_args, // possible edges, not only for the exact unichar_id, but also // for all its character classes (alpha, digit, etc). if (dawg->type() == DAWG_TYPE_PATTERN) { - ProcessPatternEdges(dawg, pos, unichar_id, word_end, - dawg_args->updated_dawgs, &curr_perm); + ProcessPatternEdges(dawg, pos, unichar_id, word_end, dawg_args, + &curr_perm); // There can't be any successors to dawg that is of type // DAWG_TYPE_PATTERN, so we are done examining this DawgPosition. continue; @@ -472,6 +484,9 @@ int Dict::def_letter_is_okay(void* void_dawg_args, continue; } if (dawg->permuter() > curr_perm) curr_perm = dawg->permuter(); + if (dawg->end_of_word(edge) && + (punc_dawg == NULL || punc_dawg->end_of_word(pos.punc_ref))) + dawg_args->valid_end = true; dawg_args->updated_dawgs->add_unique( DawgPosition(pos.dawg_index, edge, pos.punc_index, pos.punc_ref, false), @@ -496,7 +511,7 @@ int Dict::def_letter_is_okay(void* void_dawg_args, void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos, UNICHAR_ID unichar_id, bool word_end, - DawgPositionVector *updated_dawgs, + DawgArgs *dawg_args, PermuterType *curr_perm) const { NODE_REF node = GetStartingNode(dawg, pos.dawg_ref); // Try to find the edge corresponding to the exact unichar_id and to all the @@ -519,7 +534,8 @@ void Dict::ProcessPatternEdges(const Dawg *dawg, const DawgPosition &pos, tprintf("Letter found in pattern dawg %d\n", pos.dawg_index); } if (dawg->permuter() > *curr_perm) *curr_perm = dawg->permuter(); - updated_dawgs->add_unique( + if (dawg->end_of_word(edge)) dawg_args->valid_end = true; + dawg_args->updated_dawgs->add_unique( DawgPosition(pos.dawg_index, edge, pos.punc_index, pos.punc_ref, pos.back_to_punc), dawg_debug_level > 0, @@ -815,5 +831,12 @@ bool Dict::valid_punctuation(const WERD_CHOICE &word) { return false; } +/// Returns true if the language is space-delimited (not CJ, or T). +bool Dict::IsSpaceDelimitedLang() const { + const UNICHARSET &u_set = getUnicharset(); + if (u_set.han_sid() > 0) return false; + if (u_set.katakana_sid() > 0) return false; + return true; +} } // namespace tesseract diff --git a/dict/dict.h b/dict/dict.h index 326f1235d5..5ae203689f 100644 --- a/dict/dict.h +++ b/dict/dict.h @@ -23,7 +23,6 @@ #include "dawg.h" #include "dawg_cache.h" #include "host.h" -#include "oldlist.h" #include "ratngs.h" #include "stopper.h" #include "trie.h" @@ -76,11 +75,13 @@ enum XHeightConsistencyEnum {XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT}; struct DawgArgs { DawgArgs(DawgPositionVector *d, DawgPositionVector *up, PermuterType p) - : active_dawgs(d), updated_dawgs(up), permuter(p) {} + : active_dawgs(d), updated_dawgs(up), permuter(p), valid_end(false) {} DawgPositionVector *active_dawgs; DawgPositionVector *updated_dawgs; PermuterType permuter; + // True if the current position is a valid word end. + bool valid_end; }; class Dict { @@ -294,7 +295,13 @@ class Dict { /// Initialize Dict class - load dawgs from [lang].traineddata and /// user-specified wordlist and parttern list. static DawgCache *GlobalDawgCache(); - void Load(DawgCache *dawg_cache); + // Sets up ready for a Load. + void SetupForLoad(DawgCache *dawg_cache); + // Loads the dawgs needed by Tesseract. Call FinishLoad() after. + void Load(const char *data_file_name, const STRING &lang); + // Completes the loading process after Load(). + // Returns false if no dictionaries were loaded. + bool FinishLoad(); void End(); // Resets the document dictionary analogous to ResetAdaptiveClassifier. @@ -374,6 +381,7 @@ class Dict { double def_probability_in_context( const char* lang, const char* context, int context_bytes, const char* character, int character_bytes) { + (void) lang; (void) context; (void) context_bytes; (void) character; @@ -397,9 +405,7 @@ class Dict { } inline void SetWildcardID(UNICHAR_ID id) { wildcard_unichar_id_ = id; } - inline UNICHAR_ID WildcardID() const { - return wildcard_unichar_id_; - } + inline UNICHAR_ID WildcardID() const { return wildcard_unichar_id_; } /// Return the number of dawgs in the dawgs_ vector. inline int NumDawgs() const { return dawgs_.size(); } /// Return i-th dawg pointer recorded in the dawgs_ vector. @@ -436,7 +442,7 @@ class Dict { /// edges were found. void ProcessPatternEdges(const Dawg *dawg, const DawgPosition &info, UNICHAR_ID unichar_id, bool word_end, - DawgPositionVector *updated_dawgs, + DawgArgs *dawg_args, PermuterType *current_permuter) const; /// Read/Write/Access special purpose dawgs which contain words @@ -483,6 +489,8 @@ class Dict { inline void SetWordsegRatingAdjustFactor(float f) { wordseg_rating_adjust_factor_ = f; } + /// Returns true if the language is space-delimited (not CJ, or T). + bool IsSpaceDelimitedLang() const; private: /** Private member variables. */ diff --git a/wordrec/tface.cpp b/wordrec/tface.cpp index e21fcb8829..b1fc1779fb 100644 --- a/wordrec/tface.cpp +++ b/wordrec/tface.cpp @@ -49,7 +49,11 @@ void Wordrec::program_editup(const char *textbase, if (textbase != NULL) imagefile = textbase; InitFeatureDefs(&feature_defs_); InitAdaptiveClassifier(init_classifier); - if (init_dict) getDict().Load(Dict::GlobalDawgCache()); + if (init_dict) { + getDict().SetupForLoad(Dict::GlobalDawgCache()); + getDict().Load(tessdata_manager.GetDataFileName().string(), lang); + getDict().FinishLoad(); + } pass2_ok_split = chop_ok_split; } From af1d856cd9cb32f09d6a4cf9838ab0f0f2beda30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 2 Dec 2016 20:27:29 +0100 Subject: [PATCH 049/132] backport from 4.00: code improvements --- ccmain/pagesegmain.cpp | 3 --- ccmain/tesseract_cube_combiner.cpp | 2 -- ccmain/tesseract_cube_combiner.h | 20 +++++--------------- dict/dawg.h | 9 ++++++++- 4 files changed, 13 insertions(+), 21 deletions(-) diff --git a/ccmain/pagesegmain.cpp b/ccmain/pagesegmain.cpp index f4401d64d9..d815365137 100644 --- a/ccmain/pagesegmain.cpp +++ b/ccmain/pagesegmain.cpp @@ -18,9 +18,6 @@ **********************************************************************/ #ifdef _WIN32 -#ifndef __GNUC__ -#include -#endif // __GNUC__ #ifndef unlink #include #endif diff --git a/ccmain/tesseract_cube_combiner.cpp b/ccmain/tesseract_cube_combiner.cpp index e17bd04c2a..2b91f5259d 100644 --- a/ccmain/tesseract_cube_combiner.cpp +++ b/ccmain/tesseract_cube_combiner.cpp @@ -21,8 +21,6 @@ // the recognition results of Tesseract and Cube at the word level #include -#include -#include #include #include "tesseract_cube_combiner.h" diff --git a/ccmain/tesseract_cube_combiner.h b/ccmain/tesseract_cube_combiner.h index 49a0e2f4b0..9a6eb35302 100644 --- a/ccmain/tesseract_cube_combiner.h +++ b/ccmain/tesseract_cube_combiner.h @@ -27,16 +27,6 @@ #include #include "pageres.h" -#ifdef _WIN32 -#include -using namespace std; -#endif - -#ifdef USE_STD_NAMESPACE -using std::string; -using std::vector; -#endif - namespace tesseract { class CubeObject; @@ -72,15 +62,15 @@ class TesseractCubeCombiner { // output parameter will be true if both answers are identical, // false otherwise. Modifies the cube_alt_list, so no assumptions // should be made about its state upon return. - bool ComputeCombinerFeatures(const string &tess_res, + bool ComputeCombinerFeatures(const std::string &tess_res, int tess_confidence, CubeObject *cube_obj, WordAltList *cube_alt_list, - vector *features, + std::vector *features, bool *agreement); // Is the word valid according to Tesseract's language model - bool ValidWord(const string &str); + bool ValidWord(const std::string &str); // Loads the combiner neural network from file, using cube_cntxt_ // to find path. @@ -88,11 +78,11 @@ class TesseractCubeCombiner { private: // Normalize a UTF-8 string. Converts the UTF-8 string to UTF32 and optionally // strips punc and/or normalizes case and then converts back - string NormalizeString(const string &str, bool remove_punc, bool norm_case); + std::string NormalizeString(const std::string &str, bool remove_punc, bool norm_case); // Compares 2 strings after optionally normalizing them and or stripping // punctuation - int CompareStrings(const string &str1, const string &str2, bool ignore_punc, + int CompareStrings(const std::string &str1, const std::string &str2, bool ignore_punc, bool norm_case); NeuralNet *combiner_net_; // pointer to the combiner NeuralNet object diff --git a/dict/dawg.h b/dict/dawg.h index b37e771503..f040353118 100644 --- a/dict/dawg.h +++ b/dict/dawg.h @@ -183,13 +183,20 @@ class Dawg { /// of the given unichar_id. virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, - GenericVector *vec) const {}; + GenericVector *vec) const { + (void)unichar_id; + (void)unicharset; + (void)vec; + } /// Returns the given EDGE_REF if the EDGE_RECORD that it points to has /// a self loop and the given unichar_id matches the unichar_id stored in the /// EDGE_RECORD, returns NO_EDGE otherwise. virtual EDGE_REF pattern_loop_edge( EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const { + (void)edge_ref; + (void)unichar_id; + (void)word_end; return false; } From fc3d07b44f90925d44b8679cefe37db22a176c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 2 Dec 2016 21:01:17 +0100 Subject: [PATCH 050/132] backport from 4.00: api changes --- api/baseapi.cpp | 65 +++++++++++++++------------------------ api/baseapi.h | 22 +++++++------ api/renderer.h | 4 +-- ccmain/tesseractclass.cpp | 2 ++ ccmain/tesseractclass.h | 28 ++++++++++------- 5 files changed, 57 insertions(+), 64 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index b57bcd51b8..55ee8e3df1 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -34,8 +34,6 @@ // workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME #undef __STRICT_ANSI__ #endif // _MSC_VER -#include -#include #include #include #else @@ -121,7 +119,6 @@ TessBaseAPI::TessBaseAPI() block_list_(NULL), page_res_(NULL), input_file_(NULL), - input_image_(NULL), output_file_(NULL), datapath_(NULL), language_(NULL), @@ -130,6 +127,7 @@ TessBaseAPI::TessBaseAPI() truth_cb_(NULL), rect_left_(0), rect_top_(0), rect_width_(0), rect_height_(0), image_width_(0), image_height_(0) { + unknown_title_ = ""; } TessBaseAPI::~TessBaseAPI() { @@ -515,9 +513,7 @@ void TessBaseAPI::ClearAdaptiveClassifier() { /** * Provide an image for Tesseract to recognize. Format is as - * TesseractRect above. Does not copy the image buffer, or take - * ownership. The source image may be destroyed after Recognize is called, - * either explicitly or implicitly via one of the Get*Text functions. + * TesseractRect above. Copies the image buffer and converts to Pix. * SetImage clears all recognition results, and sets the rectangle to the * full image, so it may be followed immediately by a GetUTF8Text, and it * will automatically perform recognition. @@ -525,9 +521,11 @@ void TessBaseAPI::ClearAdaptiveClassifier() { void TessBaseAPI::SetImage(const unsigned char* imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line) { - if (InternalSetImage()) + if (InternalSetImage()) { thresholder_->SetImage(imagedata, width, height, bytes_per_pixel, bytes_per_line); + SetInputImage(thresholder_->GetPixRect()); + } } void TessBaseAPI::SetSourceResolution(int ppi) { @@ -539,18 +537,17 @@ void TessBaseAPI::SetSourceResolution(int ppi) { /** * Provide an image for Tesseract to recognize. As with SetImage above, - * Tesseract doesn't take a copy or ownership or pixDestroy the image, so - * it must persist until after Recognize. + * Tesseract takes its own copy of the image, so it need not persist until + * after Recognize. * Pix vs raw, which to use? - * Use Pix where possible. A future version of Tesseract may choose to use Pix - * as its internal representation and discard IMAGE altogether. - * Because of that, an implementation that sources and targets Pix may end up - * with less copies than an implementation that does not. + * Use Pix where possible. Tesseract uses Pix as its internal representation + * and it is therefore more efficient to provide a Pix directly. */ void TessBaseAPI::SetImage(Pix* pix) { - if (InternalSetImage()) + if (InternalSetImage()) { thresholder_->SetImage(pix); - SetInputImage(pix); + SetInputImage(thresholder_->GetPixRect()); + } } /** @@ -693,8 +690,8 @@ Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, if (pixa != NULL) { Pix* pix = NULL; if (raw_image) { - pix = page_it->GetImage(level, raw_padding, input_image_, - &left, &top); + pix = page_it->GetImage(level, raw_padding, GetInputImage(), &left, + &top); } else { pix = page_it->GetBinaryImage(level); } @@ -938,15 +935,10 @@ int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { return 0; } -void TessBaseAPI::SetInputImage(Pix *pix) { - if (input_image_) - pixDestroy(&input_image_); - input_image_ = NULL; - if (pix) - input_image_ = pixCopy(NULL, pix); -} +// Takes ownership of the input pix. +void TessBaseAPI::SetInputImage(Pix* pix) { tesseract_->set_pix_original(pix); } -Pix* TessBaseAPI::GetInputImage() { return input_image_; } +Pix* TessBaseAPI::GetInputImage() { return tesseract_->pix_original(); } const char * TessBaseAPI::GetInputName() { if (input_file_) @@ -990,8 +982,7 @@ bool TessBaseAPI::ProcessPagesFileList(FILE *flist, } // Begin producing output - const char* kUnknownTitle = ""; - if (renderer && !renderer->BeginDocument(kUnknownTitle)) { + if (renderer && !renderer->BeginDocument(unknown_title_)) { return false; } @@ -1103,7 +1094,6 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename, const char* retry_config, int timeout_millisec, TessResultRenderer* renderer) { -#ifndef ANDROID_BUILD PERF_COUNT_START("ProcessPages") bool stdInput = !strcmp(filename, "stdin") || !strcmp(filename, "-"); if (stdInput) { @@ -1160,8 +1150,7 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename, } // Begin the output - const char* kUnknownTitle = ""; - if (renderer && !renderer->BeginDocument(kUnknownTitle)) { + if (renderer && !renderer->BeginDocument(unknown_title_)) { pixDestroy(&pix); return false; } @@ -1183,9 +1172,6 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename, } PERF_COUNT_END return true; -#else - return false; -#endif } bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, @@ -1756,7 +1742,7 @@ char* TessBaseAPI::GetBoxText(int page_number) { int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + kMaxBytesPerLine; char* result = new char[total_length]; - strcpy(result, "\0"); + result[0] = '\0'; int output_length = 0; LTRResultIterator* it = GetLTRIterator(); do { @@ -1929,8 +1915,9 @@ char* TessBaseAPI::GetOsdText(int page_number) { // clockwise rotation needed to make the page upright int rotate = OrientationIdToValue(orient_id); - char* osd_buf = new char[255]; - snprintf(osd_buf, 255, + const int kOsdBufsize = 255; + char* osd_buf = new char[kOsdBufsize]; + snprintf(osd_buf, kOsdBufsize, "Page number: %d\n" "Orientation in degrees: %d\n" "Rotate: %d\n" @@ -2058,7 +2045,7 @@ void TessBaseAPI::Clear() { if (thresholder_ != NULL) thresholder_->Clear(); ClearResults(); - SetInputImage(NULL); + if (tesseract_ != NULL) SetInputImage(NULL); } /** @@ -2104,10 +2091,6 @@ void TessBaseAPI::End() { delete input_file_; input_file_ = NULL; } - if (input_image_ != NULL) { - pixDestroy(&input_image_); - input_image_ = NULL; - } if (output_file_ != NULL) { delete output_file_; output_file_ = NULL; diff --git a/api/baseapi.h b/api/baseapi.h index d872689eec..ffe170b531 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -142,6 +142,7 @@ class TESS_API TessBaseAPI { * is stored in the PDF so we need that as well. */ const char* GetInputName(); + // Takes ownership of the input pix. void SetInputImage(Pix *pix); Pix* GetInputImage(); int GetSourceYResolution(); @@ -333,9 +334,7 @@ class TESS_API TessBaseAPI { /** * Provide an image for Tesseract to recognize. Format is as - * TesseractRect above. Does not copy the image buffer, or take - * ownership. The source image may be destroyed after Recognize is called, - * either explicitly or implicitly via one of the Get*Text functions. + * TesseractRect above. Copies the image buffer and converts to Pix. * SetImage clears all recognition results, and sets the rectangle to the * full image, so it may be followed immediately by a GetUTF8Text, and it * will automatically perform recognition. @@ -345,13 +344,11 @@ class TESS_API TessBaseAPI { /** * Provide an image for Tesseract to recognize. As with SetImage above, - * Tesseract doesn't take a copy or ownership or pixDestroy the image, so - * it must persist until after Recognize. + * Tesseract takes its own copy of the image, so it need not persist until + * after Recognize. * Pix vs raw, which to use? - * Use Pix where possible. A future version of Tesseract may choose to use Pix - * as its internal representation and discard IMAGE altogether. - * Because of that, an implementation that sources and targets Pix may end up - * with less copies than an implementation that does not. + * Use Pix where possible. Tesseract uses Pix as its internal representation + * and it is therefore more efficient to provide a Pix directly. */ void SetImage(Pix* pix); @@ -866,7 +863,6 @@ class TESS_API TessBaseAPI { BLOCK_LIST* block_list_; ///< The page layout. PAGE_RES* page_res_; ///< The page-level data. STRING* input_file_; ///< Name used by training code. - Pix* input_image_; ///< Image used for searchable PDF STRING* output_file_; ///< Name used by debug code. STRING* datapath_; ///< Current location of tessdata. STRING* language_; ///< Last initialized language. @@ -902,6 +898,12 @@ class TESS_API TessBaseAPI { int timeout_millisec, TessResultRenderer* renderer, int tessedit_page_number); + // There's currently no way to pass a document title from the + // Tesseract command line, and we have multiple places that choose + // to set the title to an empty string. Using a single named + // variable will hopefully reduce confusion if the situation changes + // in the future. + const char *unknown_title_; }; // class TessBaseAPI. /** Escape a char string - remove &<>"' with HTML codes. */ diff --git a/api/renderer.h b/api/renderer.h index ac64e4b452..d868f267fa 100644 --- a/api/renderer.h +++ b/api/renderer.h @@ -77,7 +77,7 @@ class TESS_API TessResultRenderer { bool EndDocument(); const char* file_extension() const { return file_extension_; } - const char* title() const { return title_; } + const char* title() const { return title_.c_str(); } /** * Returns the index of the last image given to AddImage @@ -126,7 +126,7 @@ class TESS_API TessResultRenderer { private: const char* file_extension_; // standard extension for generated output - const char* title_; // title of document being renderered + STRING title_; // title of document being renderered int imagenum_; // index of last image added FILE* fout_; // output file pointer diff --git a/ccmain/tesseractclass.cpp b/ccmain/tesseractclass.cpp index f0cc1bfffe..5cbf70c8c2 100644 --- a/ccmain/tesseractclass.cpp +++ b/ccmain/tesseractclass.cpp @@ -605,6 +605,7 @@ Tesseract::Tesseract() pix_binary_(NULL), cube_binary_(NULL), pix_grey_(NULL), + pix_original_(NULL), pix_thresholds_(NULL), source_resolution_(0), textord_(this), @@ -624,6 +625,7 @@ Tesseract::Tesseract() Tesseract::~Tesseract() { Clear(); + pixDestroy(&pix_original_); end_tesseract(); sub_langs_.delete_data_pointers(); #ifndef NO_CUBE_BUILD diff --git a/ccmain/tesseractclass.h b/ccmain/tesseractclass.h index 5bc0b102c6..e01625e354 100644 --- a/ccmain/tesseractclass.h +++ b/ccmain/tesseractclass.h @@ -38,7 +38,6 @@ class BLOB_CHOICE_LIST_CLIST; class BLOCK_LIST; -class CharSamp; struct OSResults; class PAGE_RES; class PAGE_RES_IT; @@ -98,6 +97,7 @@ namespace tesseract { class ColumnFinder; #ifndef NO_CUBE_BUILD +class CharSamp; class CubeLineObject; class CubeObject; class CubeRecoContext; @@ -189,7 +189,7 @@ class Tesseract : public Wordrec { } // Destroy any existing pix and return a pointer to the pointer. Pix** mutable_pix_binary() { - Clear(); + pixDestroy(&pix_binary_); return &pix_binary_; } Pix* pix_binary() const { @@ -202,16 +202,20 @@ class Tesseract : public Wordrec { pixDestroy(&pix_grey_); pix_grey_ = grey_pix; } - // Returns a pointer to a Pix representing the best available image of the - // page. The image will be 8-bit grey if the input was grey or color. Note - // that in grey 0 is black and 255 is white. If the input was binary, then - // the returned Pix will be binary. Note that here black is 1 and white is 0. - // To tell the difference pixGetDepth() will return 8 or 1. - // In either case, the return value is a borrowed Pix, and should not be - // deleted or pixDestroyed. - Pix* BestPix() const { - return pix_grey_ != NULL ? pix_grey_ : pix_binary_; + Pix* pix_original() const { return pix_original_; } + // Takes ownership of the given original_pix. + void set_pix_original(Pix* original_pix) { + pixDestroy(&pix_original_); + pix_original_ = original_pix; } + // Returns a pointer to a Pix representing the best available (original) image + // of the page. Can be of any bit depth, but never color-mapped, as that has + // always been dealt with. Note that in grey and color, 0 is black and 255 is + // white. If the input was binary, then black is 1 and white is 0. + // To tell the difference pixGetDepth() will return 32, 8 or 1. + // In any case, the return value is a borrowed Pix, and should not be + // deleted or pixDestroyed. + Pix* BestPix() const { return pix_original_; } void set_pix_thresholds(Pix* thresholds) { pixDestroy(&pix_thresholds_); pix_thresholds_ = thresholds; @@ -1174,6 +1178,8 @@ class Tesseract : public Wordrec { Pix* cube_binary_; // Grey-level input image if the input was not binary, otherwise NULL. Pix* pix_grey_; + // Original input image. Color if the input was color. + Pix* pix_original_; // Thresholds that were used to generate the thresholded image from grey. Pix* pix_thresholds_; // Input image resolution after any scaling. The resolution is not well From 9ccda80dd6999ba7bbf8de3090f256eedce915ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 2 Dec 2016 21:07:28 +0100 Subject: [PATCH 051/132] backport from 4.00: training shell scripts --- training/language-specific.sh | 7 ++++++- training/tesstrain_utils.sh | 6 +++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/training/language-specific.sh b/training/language-specific.sh index 5f624c6437..b3c38cf876 100755 --- a/training/language-specific.sh +++ b/training/language-specific.sh @@ -878,6 +878,9 @@ set_lang_specific_parameters() { AMBIGS_FILTER_DENOMINATOR="100000" LEADING="32" MEAN_COUNT="40" # Default for latin script. + # Language to mix with the language for maximum accuracy. Defaults to eng. + # If no language is good, set to the base language. + MIX_LANG="eng" case ${lang} in # Latin languages. @@ -969,11 +972,13 @@ set_lang_specific_parameters() { WORD_DAWG_SIZE=1000000 test -z "$FONTS" && FONTS=( "${EARLY_LATIN_FONTS[@]}" );; - # Cyrillic script-based languages. + # Cyrillic script-based languages. It is bad to mix Latin with Cyrillic. rus ) test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" ) + MIX_LANG="rus" NUMBER_DAWG_FACTOR=0.05 WORD_DAWG_SIZE=1000000 ;; aze_cyrl | bel | bul | kaz | mkd | srp | tgk | ukr | uzb_cyrl ) + MIX_LANG="${lang}" test -z "$FONTS" && FONTS=( "${RUSSIAN_FONTS[@]}" ) ;; # Special code for performing Cyrillic language-id that is trained on diff --git a/training/tesstrain_utils.sh b/training/tesstrain_utils.sh index 48df3e4d59..906a20ac4f 100755 --- a/training/tesstrain_utils.sh +++ b/training/tesstrain_utils.sh @@ -90,8 +90,8 @@ parse_flags() { --) break;; --fontlist) - fn=0 - FONTS="" + fn=0 + FONTS="" while test $j -lt ${#ARGV[@]}; do test -z "${ARGV[$j]}" && break test `echo ${ARGV[$j]} | cut -c -2` = "--" && break @@ -199,7 +199,7 @@ generate_font_image() { local common_args="--fontconfig_tmpdir=${FONT_CONFIG_CACHE}" common_args+=" --fonts_dir=${FONTS_DIR} --strip_unrenderable_words" - common_args+=" --fontconfig_refresh_config_file=false --leading=${LEADING}" + common_args+=" --leading=${LEADING}" common_args+=" --char_spacing=${CHAR_SPACING} --exposure=${EXPOSURE}" common_args+=" --outputbase=${outbase}" From 53152e4221a67fdf2c548bc50eefa8f78e5f6f96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 2 Dec 2016 21:28:31 +0100 Subject: [PATCH 052/132] change VS2010 lib project name --- vs2010/libtesseract/libtesseract.vcxproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vs2010/libtesseract/libtesseract.vcxproj b/vs2010/libtesseract/libtesseract.vcxproj index 9b734d5d52..90edfffa90 100644 --- a/vs2010/libtesseract/libtesseract.vcxproj +++ b/vs2010/libtesseract/libtesseract.vcxproj @@ -27,7 +27,7 @@ - libtesseract304 + libtesseract305 {D14F28C7-0CAE-4C37-B174-40FDEFBD4FE0} libtesseract From 70993585106155af617ef93306539e854a4ce19b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Sun, 4 Dec 2016 22:06:52 +0100 Subject: [PATCH 053/132] backport from 4.00: training --- training/boxchar.cpp | 12 +- training/boxchar.h | 3 + training/cntraining.cpp | 42 ++++--- training/commontraining.cpp | 10 +- training/degradeimage.cpp | 163 +++++++++++++++++++++++++ training/degradeimage.h | 28 ++++- training/fileio.cpp | 2 +- training/mftraining.cpp | 6 +- training/normstrngs.cpp | 8 +- training/normstrngs.h | 9 +- training/pango_font_info.cpp | 129 +++++++++---------- training/pango_font_info.h | 31 +++-- training/stringrenderer.cpp | 20 +-- training/unicharset_training_utils.cpp | 7 +- training/unicharset_training_utils.h | 8 +- 15 files changed, 357 insertions(+), 121 deletions(-) diff --git a/training/boxchar.cpp b/training/boxchar.cpp index b99c12a600..4324597744 100644 --- a/training/boxchar.cpp +++ b/training/boxchar.cpp @@ -73,7 +73,6 @@ void BoxChar::PrepareToWrite(vector* boxes) { if (rtl_rules) { ReorderRTLText(boxes); } - tprintf("Rtl = %d ,vertical=%d\n", rtl_rules, vertical_rules); } // Inserts newline (tab) characters into the vector at newline positions. @@ -291,13 +290,19 @@ const int kMaxLineLength = 1024; /* static */ void BoxChar::WriteTesseractBoxFile(const string& filename, int height, const vector& boxes) { + string output = GetTesseractBoxStr(height, boxes); + File::WriteStringToFileOrDie(output, filename); +} + +/* static */ +string BoxChar::GetTesseractBoxStr(int height, const vector& boxes) { string output; char buffer[kMaxLineLength]; for (int i = 0; i < boxes.size(); ++i) { const Box* box = boxes[i]->box_; if (box == NULL) { tprintf("Error: Call PrepareToWrite before WriteTesseractBoxFile!!\n"); - return; + return ""; } int nbytes = snprintf(buffer, kMaxLineLength, "%s %d %d %d %d %d\n", @@ -305,6 +310,7 @@ void BoxChar::WriteTesseractBoxFile(const string& filename, int height, box->x + box->w, height - box->y, boxes[i]->page_); output.append(buffer, nbytes); } - File::WriteStringToFileOrDie(output, filename); + return output; } + } // namespace tesseract diff --git a/training/boxchar.h b/training/boxchar.h index 27b568a143..3748c4abad 100644 --- a/training/boxchar.h +++ b/training/boxchar.h @@ -100,6 +100,9 @@ class BoxChar { // is needed to convert to tesseract coordinates. static void WriteTesseractBoxFile(const string& name, int height, const vector& boxes); + // Gets the tesseract box file as a string from the vector of boxes. + // The image height is needed to convert to tesseract coordinates. + static string GetTesseractBoxStr(int height, const vector& boxes); private: string ch_; diff --git a/training/cntraining.cpp b/training/cntraining.cpp index 916a758576..6f4f42aebe 100644 --- a/training/cntraining.cpp +++ b/training/cntraining.cpp @@ -52,8 +52,8 @@ int main ( Private Function Prototypes ----------------------------------------------------------------------------*/ -void WriteNormProtos (const char *Directory, LIST LabeledProtoList, - CLUSTERER *Clusterer); +void WriteNormProtos(const char *Directory, LIST LabeledProtoList, + const FEATURE_DESC_STRUCT *feature_desc); /* PARAMDESC *ConvertToPARAMDESC( @@ -160,13 +160,18 @@ int main(int argc, char *argv[]) { // reduce the min samples: // Config.MinSamples = 0.5 / num_fonts; pCharList = CharList; + // The norm protos will count the source protos, so we keep them here in + // freeable_protos, so they can be freed later. + GenericVector freeable_protos; iterate(pCharList) { //Cluster - if (Clusterer) - FreeClusterer(Clusterer); CharSample = (LABELEDLIST)first_node(pCharList); Clusterer = SetUpForClustering(FeatureDefs, CharSample, PROGRAM_FEATURE_TYPE); + if (Clusterer == NULL) { // To avoid a SIGSEGV + fprintf(stderr, "Error: NULL clusterer!\n"); + return 1; + } float SavedMinSamples = Config.MinSamples; // To disable the tendency to produce a single cluster for all fonts, // make MagicSamples an impossible to achieve number: @@ -185,21 +190,21 @@ int main(int argc, char *argv[]) { } Config.MinSamples = SavedMinSamples; AddToNormProtosList(&NormProtoList, ProtoList, CharSample->Label); + freeable_protos.push_back(ProtoList); + FreeClusterer(Clusterer); } FreeTrainingSamples(CharList); - if (Clusterer == NULL) { // To avoid a SIGSEGV - fprintf(stderr, "Error: NULL clusterer!\n"); - return 1; - } - WriteNormProtos(FLAGS_D.c_str(), NormProtoList, Clusterer); + int desc_index = ShortNameToFeatureType(FeatureDefs, PROGRAM_FEATURE_TYPE); + WriteNormProtos(FLAGS_D.c_str(), NormProtoList, + FeatureDefs.FeatureDesc[desc_index]); FreeNormProtoList(NormProtoList); - FreeProtoList(&ProtoList); - FreeClusterer(Clusterer); + for (int i = 0; i < freeable_protos.size(); ++i) { + FreeProtoList(&freeable_protos[i]); + } printf ("\n"); return 0; } // main - /*---------------------------------------------------------------------------- Private Code ----------------------------------------------------------------------------*/ @@ -211,14 +216,13 @@ int main(int argc, char *argv[]) { * of the samples. * @param Directory directory to place sample files into * @param LabeledProtoList List of labeled protos -* @param Clusterer The CLUSTERER to use +* @param feature_desc Description of the features * @return none * @note Exceptions: none * @note History: Fri Aug 18 16:17:06 1989, DSJ, Created. */ -void WriteNormProtos(const char *Directory, LIST LabeledProtoList, - CLUSTERER *Clusterer) -{ +void WriteNormProtos(const char *Directory, LIST LabeledProtoList, + const FEATURE_DESC_STRUCT *feature_desc) { FILE *File; STRING Filename; LABELEDLIST LabeledProto; @@ -233,8 +237,8 @@ void WriteNormProtos(const char *Directory, LIST LabeledProtoList, Filename += "normproto"; printf ("\nWriting %s ...", Filename.string()); File = Efopen (Filename.string(), "wb"); - fprintf(File, "%0d\n", Clusterer->SampleSize); - WriteParamDesc(File, Clusterer->SampleSize,Clusterer->ParamDesc); + fprintf(File, "%0d\n", feature_desc->NumParams); + WriteParamDesc(File, feature_desc->NumParams, feature_desc->ParamDesc); iterate(LabeledProtoList) { LabeledProto = (LABELEDLIST) first_node (LabeledProtoList); @@ -249,7 +253,7 @@ void WriteNormProtos(const char *Directory, LIST LabeledProtoList, exit(1); } fprintf(File, "\n%s %d\n", LabeledProto->Label, N); - WriteProtos(File, Clusterer->SampleSize, LabeledProto->List, true, false); + WriteProtos(File, feature_desc->NumParams, LabeledProto->List, true, false); } fclose (File); diff --git a/training/commontraining.cpp b/training/commontraining.cpp index b7243e6f3f..f77a553de4 100644 --- a/training/commontraining.cpp +++ b/training/commontraining.cpp @@ -40,7 +40,6 @@ using tesseract::CCUtil; using tesseract::IntFeatureSpace; -using tesseract::FontInfo; using tesseract::ParamUtils; using tesseract::ShapeTable; @@ -453,6 +452,7 @@ void FreeTrainingSamples(LIST CharList) { FEATURE_SET FeatureSet; LIST FeatureList; + LIST nodes = CharList; iterate(CharList) { /* iterate through all of the fonts */ char_sample = (LABELEDLIST) first_node(CharList); FeatureList = char_sample->List; @@ -462,7 +462,7 @@ void FreeTrainingSamples(LIST CharList) { } FreeLabeledList(char_sample); } - destroy(CharList); + destroy(nodes); } /* FreeTrainingSamples */ /*---------------------------------------------------------------------------*/ @@ -728,6 +728,7 @@ MERGE_CLASS NewLabeledClass(const char* Label) { void FreeLabeledClassList(LIST ClassList) { MERGE_CLASS MergeClass; + LIST nodes = ClassList; iterate(ClassList) /* iterate through all of the fonts */ { MergeClass = (MERGE_CLASS) first_node (ClassList); @@ -735,7 +736,7 @@ void FreeLabeledClassList(LIST ClassList) { FreeClass(MergeClass->Class); delete MergeClass; } - destroy(ClassList); + destroy(nodes); } /* FreeLabeledClassList */ @@ -825,12 +826,13 @@ void FreeNormProtoList(LIST CharList) { LABELEDLIST char_sample; + LIST nodes = CharList; iterate(CharList) /* iterate through all of the fonts */ { char_sample = (LABELEDLIST) first_node (CharList); FreeLabeledList (char_sample); } - destroy(CharList); + destroy(nodes); } // FreeNormProtoList diff --git a/training/degradeimage.cpp b/training/degradeimage.cpp index f9c3cfb048..333f3703dc 100644 --- a/training/degradeimage.cpp +++ b/training/degradeimage.cpp @@ -22,10 +22,36 @@ #include #include "allheaders.h" // from leptonica +#include "genericvector.h" #include "helpers.h" // For TRand. +#include "rect.h" namespace tesseract { +// A randomized perspective distortion can be applied to synthetic input. +// The perspective distortion comes from leptonica, which uses 2 sets of 4 +// corners to determine the distortion. There are random values for each of +// the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead +// defined in terms of a single shear value. This reduces the degrees of +// freedom enough to make the distortion more realistic than it would otherwise +// be if all 8 coordinates could move independently. +// One additional factor is used for the color of the pixels that don't exist +// in the source image. +// Name for each of the randomizing factors. +enum FactorNames { + FN_INCOLOR, + FN_Y0, + FN_Y1, + FN_Y2, + FN_Y3, + FN_X0, + FN_X1, + FN_SHEAR, + // x2 = x1 - shear + // x3 = x0 + shear + FN_NUM_FACTORS +}; + // Rotation is +/- kRotationRange radians. const float kRotationRange = 0.02f; // Number of grey levels to shift by for each exposure step. @@ -144,4 +170,141 @@ Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer, return input; } +// Creates and returns a Pix distorted by various means according to the bool +// flags. If boxes is not NULL, the boxes are resized/positioned according to +// any spatial distortion and also by the integer reduction factor box_scale +// so they will match what the network will output. +// Returns NULL on error. The returned Pix must be pixDestroyed. +Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert, + bool white_noise, bool smooth_noise, bool blur, + int box_reduction, TRand* randomizer, + GenericVector* boxes) { + Pix* distorted = pixCopy(NULL, const_cast(pix)); + // Things to do to synthetic training data. + if (invert && randomizer->SignedRand(1.0) < 0) + pixInvert(distorted, distorted); + if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) { + // TODO(rays) Cook noise in a more thread-safe manner than rand(). + // Attempt to make the sequences reproducible. + srand(randomizer->IntRand()); + Pix* pixn = pixAddGaussianNoise(distorted, 8.0); + pixDestroy(&distorted); + if (smooth_noise) { + distorted = pixBlockconv(pixn, 1, 1); + pixDestroy(&pixn); + } else { + distorted = pixn; + } + } + if (blur && randomizer->SignedRand(1.0) > 0.0) { + Pix* blurred = pixBlockconv(distorted, 1, 1); + pixDestroy(&distorted); + distorted = blurred; + } + if (perspective) + GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes); + if (boxes != NULL) { + for (int b = 0; b < boxes->size(); ++b) { + (*boxes)[b].scale(1.0f / box_reduction); + if ((*boxes)[b].width() <= 0) + (*boxes)[b].set_right((*boxes)[b].left() + 1); + } + } + return distorted; +} + +// Distorts anything that has a non-null pointer with the same pseudo-random +// perspective distortion. Width and height only need to be set if there +// is no pix. If there is a pix, then they will be taken from there. +void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer, + Pix** pix, GenericVector* boxes) { + if (pix != NULL && *pix != NULL) { + width = pixGetWidth(*pix); + height = pixGetHeight(*pix); + } + float* im_coeffs = NULL; + float* box_coeffs = NULL; + l_int32 incolor = + ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs); + if (pix != NULL && *pix != NULL) { + // Transform the image. + Pix* transformed = pixProjective(*pix, im_coeffs, incolor); + if (transformed == NULL) { + tprintf("Projective transformation failed!!\n"); + return; + } + pixDestroy(pix); + *pix = transformed; + } + if (boxes != NULL) { + // Transform the boxes. + for (int b = 0; b < boxes->size(); ++b) { + int x1, y1, x2, y2; + const TBOX& box = (*boxes)[b]; + projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1, + &y1); + projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(), + &x2, &y2); + TBOX new_box1(x1, height - y2, x2, height - y1); + projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(), + &x1, &y1); + projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2, + &y2); + TBOX new_box2(x1, height - y1, x2, height - y2); + (*boxes)[b] = new_box1.bounding_union(new_box2); + } + } + free(im_coeffs); + free(box_coeffs); +} + +// Computes the coefficients of a randomized projective transformation. +// The image transform requires backward transformation coefficient, and the +// box transform the forward coefficients. +// Returns the incolor arg to pixProjective. +int ProjectiveCoeffs(int width, int height, TRand* randomizer, + float** im_coeffs, float** box_coeffs) { + // Setup "from" points. + Pta* src_pts = ptaCreate(4); + ptaAddPt(src_pts, 0.0f, 0.0f); + ptaAddPt(src_pts, width, 0.0f); + ptaAddPt(src_pts, width, height); + ptaAddPt(src_pts, 0.0f, height); + // Extract factors from pseudo-random sequence. + float factors[FN_NUM_FACTORS]; + float shear = 0.0f; // Shear is signed. + for (int i = 0; i < FN_NUM_FACTORS; ++i) { + // Everything is squared to make wild values rarer. + if (i == FN_SHEAR) { + // Shear is signed. + shear = randomizer->SignedRand(0.5 / 3.0); + shear = shear >= 0.0 ? shear * shear : -shear * shear; + // Keep the sheared points within the original rectangle. + if (shear < -factors[FN_X0]) shear = -factors[FN_X0]; + if (shear > factors[FN_X1]) shear = factors[FN_X1]; + factors[i] = shear; + } else if (i != FN_INCOLOR) { + factors[i] = fabs(randomizer->SignedRand(1.0)); + if (i <= FN_Y3) + factors[i] *= 5.0 / 8.0; + else + factors[i] *= 0.5; + factors[i] *= factors[i]; + } + } + // Setup "to" points. + Pta* dest_pts = ptaCreate(4); + ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height); + ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height); + ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width, + (1 - factors[FN_Y2]) * height); + ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width, + (1 - factors[FN_Y3]) * height); + getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs); + getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs); + ptaDestroy(&src_pts); + ptaDestroy(&dest_pts); + return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK; +} + } // namespace tesseract diff --git a/training/degradeimage.h b/training/degradeimage.h index 2add6282f8..a7af9565ff 100644 --- a/training/degradeimage.h +++ b/training/degradeimage.h @@ -20,12 +20,13 @@ #ifndef TESSERACT_TRAINING_DEGRADEIMAGE_H_ #define TESSERACT_TRAINING_DEGRADEIMAGE_H_ -struct Pix; +#include "allheaders.h" +#include "genericvector.h" +#include "helpers.h" // For TRand. +#include "rect.h" namespace tesseract { -class TRand; - // Degrade the pix as if by a print/copy/scan cycle with exposure > 0 // corresponding to darkening on the copier and <0 lighter and 0 not copied. // If rotation is not NULL, the clockwise rotation in radians is saved there. @@ -34,6 +35,27 @@ class TRand; struct Pix* DegradeImage(struct Pix* input, int exposure, TRand* randomizer, float* rotation); +// Creates and returns a Pix distorted by various means according to the bool +// flags. If boxes is not NULL, the boxes are resized/positioned according to +// any spatial distortion and also by the integer reduction factor box_scale +// so they will match what the network will output. +// Returns NULL on error. The returned Pix must be pixDestroyed. +Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert, + bool white_noise, bool smooth_noise, bool blur, + int box_reduction, TRand* randomizer, + GenericVector* boxes); +// Distorts anything that has a non-null pointer with the same pseudo-random +// perspective distortion. Width and height only need to be set if there +// is no pix. If there is a pix, then they will be taken from there. +void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer, + Pix** pix, GenericVector* boxes); +// Computes the coefficients of a randomized projective transformation. +// The image transform requires backward transformation coefficient, and the +// box transform the forward coefficients. +// Returns the incolor arg to pixProjective. +int ProjectiveCoeffs(int width, int height, TRand* randomizer, + float** im_coeffs, float** box_coeffs); + } // namespace tesseract #endif // TESSERACT_TRAINING_DEGRADEIMAGE_H_ diff --git a/training/fileio.cpp b/training/fileio.cpp index f82582da74..bb1f4afcef 100644 --- a/training/fileio.cpp +++ b/training/fileio.cpp @@ -81,7 +81,7 @@ bool File::ReadFileToString(const string& filename, string* out) { } string File::JoinPath(const string& prefix, const string& suffix) { - return (!prefix.size() || prefix[prefix.size() - 1] == '/') + return (prefix.empty() || prefix[prefix.size() - 1] == '/') ? prefix + suffix : prefix + "/" + suffix; } diff --git a/training/mftraining.cpp b/training/mftraining.cpp index 60314a1cdf..9e2e250927 100644 --- a/training/mftraining.cpp +++ b/training/mftraining.cpp @@ -64,9 +64,6 @@ #include "tprintf.h" #include "unicity_table.h" -using tesseract::Classify; -using tesseract::FontInfo; -using tesseract::FontSpacingInfo; using tesseract::IndexMapBiDi; using tesseract::MasterTrainer; using tesseract::Shape; @@ -305,6 +302,9 @@ int main (int argc, char **argv) { *shape_table, float_classes, inttemp_file.string(), pffmtable_file.string()); + for (int c = 0; c < unicharset->size(); ++c) { + FreeClassFields(&float_classes[c]); + } delete [] float_classes; FreeLabeledClassList(mf_classes); delete trainer; diff --git a/training/normstrngs.cpp b/training/normstrngs.cpp index acffeee13d..e7cac21f4b 100644 --- a/training/normstrngs.cpp +++ b/training/normstrngs.cpp @@ -113,12 +113,12 @@ bool is_double_quote(const char32 ch) { return false; } -STRING NormalizeUTF8String(const char* str8) { +STRING NormalizeUTF8String(bool decompose, const char* str8) { GenericVector str32, out_str32, norm_str; UTF8ToUTF32(str8, &str32); for (int i = 0; i < str32.length(); ++i) { norm_str.clear(); - NormalizeChar32(str32[i], &norm_str); + NormalizeChar32(str32[i], decompose, &norm_str); for (int j = 0; j < norm_str.length(); ++j) { out_str32.push_back(norm_str[j]); } @@ -128,10 +128,10 @@ STRING NormalizeUTF8String(const char* str8) { return out_str8; } -void NormalizeChar32(char32 ch, GenericVector* str) { +void NormalizeChar32(char32 ch, bool decompose, GenericVector* str) { IcuErrorCode error_code; const icu::Normalizer2* nfkc = icu::Normalizer2::getInstance( - NULL, "nfkc", UNORM2_COMPOSE, error_code); + NULL, "nfkc", decompose ? UNORM2_DECOMPOSE : UNORM2_COMPOSE, error_code); error_code.assertSuccess(); error_code.reset(); diff --git a/training/normstrngs.h b/training/normstrngs.h index 71e7b8da08..6fca3193ab 100644 --- a/training/normstrngs.h +++ b/training/normstrngs.h @@ -39,11 +39,16 @@ void UTF32ToUTF8(const GenericVector& str32, STRING* utf8_str); // assumption of this function is that the input is already as fully composed // as it can be, but may require some compatibility normalizations or just // OCR evaluation related normalizations. -void NormalizeChar32(char32 ch, GenericVector* str); +void NormalizeChar32(char32 ch, bool decompose, GenericVector* str); // Normalize a UTF8 string. Same as above, but for UTF8-encoded strings, that // can contain multiple UTF32 code points. -STRING NormalizeUTF8String(const char* str8); +STRING NormalizeUTF8String(bool decompose, const char* str8); +// Default behavior is to compose, until it is proven that decomposed benefits +// at least one language. +inline STRING NormalizeUTF8String(const char* str8) { + return NormalizeUTF8String(false, str8); +} // Apply just the OCR-specific normalizations and return the normalized char. char32 OCRNormalize(char32 ch); diff --git a/training/pango_font_info.cpp b/training/pango_font_info.cpp index 6ca8c8998f..41e352eae4 100644 --- a/training/pango_font_info.cpp +++ b/training/pango_font_info.cpp @@ -60,15 +60,6 @@ STRING_PARAM_FLAG(fontconfig_tmpdir, "/tmp", "Overrides fontconfig default temporary dir"); -BOOL_PARAM_FLAG(fontconfig_refresh_cache, false, - "Does a one-time deletion of cache files from the " - "fontconfig_tmpdir before initializing fontconfig."); -BOOL_PARAM_FLAG(fontconfig_refresh_config_file, true, - "Does a one-time reset of the fontconfig config file to point" - " to fonts_dir before initializing fontconfig. Set to true" - " if fontconfig_refresh_cache is true. Set it to false to use" - " multiple instances in separate processes without having to" - " rescan the fonts_dir, using a previously setup font cache"); #ifndef USE_STD_NAMESPACE #include "ocr/trainingdata/typesetting/legacy_fonts.h" @@ -91,7 +82,8 @@ namespace tesseract { // in pixels. const int kDefaultResolution = 300; -bool PangoFontInfo::fontconfig_initialized_ = false; +string PangoFontInfo::fonts_dir_; +string PangoFontInfo::cache_dir_; PangoFontInfo::PangoFontInfo() : desc_(NULL), resolution_(kDefaultResolution) { Clear(); @@ -119,6 +111,8 @@ void PangoFontInfo::Clear() { } } +PangoFontInfo::~PangoFontInfo() { pango_font_description_free(desc_); } + string PangoFontInfo::DescriptionName() const { if (!desc_) return ""; char* desc_str = pango_font_description_to_string(desc_); @@ -127,59 +121,63 @@ string PangoFontInfo::DescriptionName() const { return desc_name; } -// Initializes Fontconfig for use by writing a fake fonts.conf file into the -// FLAGS_fontconfigs_tmpdir directory, that points to the supplied -// fonts_dir, and then overrides the FONTCONFIG_PATH environment variable -// to point to this fonts.conf file. If force_clear, the cache is refreshed -// even if it has already been initialized. -void PangoFontInfo::InitFontConfig(bool force_clear, const string& fonts_dir) { - if ((fontconfig_initialized_ && !force_clear) || fonts_dir.empty()) { - fontconfig_initialized_ = true; - return; - } - if (FLAGS_fontconfig_refresh_cache || force_clear) { - File::DeleteMatchingFiles(File::JoinPath( - FLAGS_fontconfig_tmpdir.c_str(), "*cache-?").c_str()); - } - if (FLAGS_fontconfig_refresh_config_file || FLAGS_fontconfig_refresh_cache || - force_clear) { - const int MAX_FONTCONF_FILESIZE = 1024; - char fonts_conf_template[MAX_FONTCONF_FILESIZE]; - snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE, - "\n" - "\n" - "\n" - "%s\n" - "%s\n" - "\n" - "", fonts_dir.c_str(), - FLAGS_fontconfig_tmpdir.c_str()); - string fonts_conf_file = File::JoinPath(FLAGS_fontconfig_tmpdir.c_str(), - "fonts.conf"); - File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file); +// If not already initialized, initializes FontConfig by setting its +// environment variable and creating a fonts.conf file that points to the +// FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir. +/* static */ +void PangoFontInfo::SoftInitFontConfig() { + if (fonts_dir_.empty()) { + HardInitFontConfig(FLAGS_fonts_dir.c_str(), + FLAGS_fontconfig_tmpdir.c_str()); } +} + +// Re-initializes font config, whether or not already initialized. +// If already initialized, any existing cache is deleted, just to be sure. +/* static */ +void PangoFontInfo::HardInitFontConfig(const string& fonts_dir, + const string& cache_dir) { + if (!cache_dir_.empty()) { + File::DeleteMatchingFiles( + File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str()); + } + const int MAX_FONTCONF_FILESIZE = 1024; + char fonts_conf_template[MAX_FONTCONF_FILESIZE]; + cache_dir_ = cache_dir; + fonts_dir_ = fonts_dir; + snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE, + "\n" + "\n" + "\n" + "%s\n" + "%s\n" + "\n" + "", + fonts_dir.c_str(), cache_dir_.c_str()); + string fonts_conf_file = File::JoinPath(cache_dir_.c_str(), "fonts.conf"); + File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file); #ifdef _WIN32 std::string env("FONTCONFIG_PATH="); - env.append(FLAGS_fontconfig_tmpdir.c_str()); + env.append(cache_dir_.c_str()); putenv(env.c_str()); putenv("LANG=en_US.utf8"); #else - setenv("FONTCONFIG_PATH", FLAGS_fontconfig_tmpdir.c_str(), true); + setenv("FONTCONFIG_PATH", cache_dir_.c_str(), true); // Fix the locale so that the reported font names are consistent. setenv("LANG", "en_US.utf8", true); #endif // _WIN32 - if (!fontconfig_initialized_ || force_clear) { - if (FcInitReinitialize() != FcTrue) { - tprintf("FcInitiReinitialize failed!!\n"); - } + + if (FcInitReinitialize() != FcTrue) { + tprintf("FcInitiReinitialize failed!!\n"); } - fontconfig_initialized_ = true; FontUtils::ReInit(); + // Clear Pango's font cache too. + pango_cairo_font_map_set_default(NULL); } static void ListFontFamilies(PangoFontFamily*** families, int* n_families) { - PangoFontInfo::InitFontConfig(false, FLAGS_fonts_dir.c_str()); + PangoFontInfo::SoftInitFontConfig(); PangoFontMap* font_map = pango_cairo_font_map_get_default(); DISABLE_HEAP_LEAK_CHECK; pango_font_map_list_families(font_map, families, n_families); @@ -253,7 +251,7 @@ bool PangoFontInfo::ParseFontDescriptionName(const string& name) { // in the font map. Note that if the font is wholly missing, this could // correspond to a completely different font family and face. PangoFont* PangoFontInfo::ToPangoFont() const { - InitFontConfig(false, FLAGS_fonts_dir.c_str()); + SoftInitFontConfig(); PangoFontMap* font_map = pango_cairo_font_map_get_default(); PangoContext* context = pango_context_new(); pango_cairo_context_set_resolution(context, resolution_); @@ -437,10 +435,15 @@ bool PangoFontInfo::CanRenderString(const char* utf8_word, int len, PangoGlyph dotted_circle_glyph; PangoFont* font = run->item->analysis.font; - PangoGlyphString * glyphs = pango_glyph_string_new(); +#ifdef _WIN32 // Fixme! Leaks memory and breaks unittests. + PangoGlyphString* glyphs = pango_glyph_string_new(); char s[] = "\xc2\xa7"; pango_shape(s, sizeof(s), &(run->item->analysis), glyphs); dotted_circle_glyph = glyphs->glyphs[0].glyph; +#else + dotted_circle_glyph = pango_fc_font_get_glyph( + reinterpret_cast(font), kDottedCircleGlyph); +#endif if (TLOG_IS_ON(2)) { PangoFontDescription* desc = pango_font_describe(font); @@ -519,22 +522,21 @@ vector FontUtils::available_fonts_; // cache list bool FontUtils::IsAvailableFont(const char* input_query_desc, string* best_match) { string query_desc(input_query_desc); - if (PANGO_VERSION <= 12005) { - // Strip commas and any ' Medium' substring in the name. - query_desc.erase(std::remove(query_desc.begin(), query_desc.end(), ','), - query_desc.end()); - const string kMediumStr = " Medium"; - std::size_t found = query_desc.find(kMediumStr); - if (found != std::string::npos) { - query_desc.erase(found, kMediumStr.length()); - } +#if (PANGO_VERSION <= 12005) + // Strip commas and any ' Medium' substring in the name. + query_desc.erase(std::remove(query_desc.begin(), query_desc.end(), ','), + query_desc.end()); + const string kMediumStr = " Medium"; + std::size_t found = query_desc.find(kMediumStr); + if (found != std::string::npos) { + query_desc.erase(found, kMediumStr.length()); } - +#endif PangoFontDescription *desc = pango_font_description_from_string( query_desc.c_str()); PangoFont* selected_font = NULL; { - PangoFontInfo::InitFontConfig(false, FLAGS_fonts_dir.c_str()); + PangoFontInfo::SoftInitFontConfig(); PangoFontMap* font_map = pango_cairo_font_map_get_default(); PangoContext* context = pango_context_new(); pango_context_set_font_map(context, font_map); @@ -589,7 +591,7 @@ static bool ShouldIgnoreFontFamilyName(const char* query) { // Outputs description names of available fonts. /* static */ const vector& FontUtils::ListAvailableFonts() { - if (available_fonts_.size()) { + if (!available_fonts_.empty()) { return available_fonts_; } #ifndef USE_STD_NAMESPACE @@ -687,8 +689,7 @@ void FontUtils::GetAllRenderableCharacters(const vector& fonts, /* static */ int FontUtils::FontScore(const TessHashMap& ch_map, - const string& fontname, - int* raw_score, + const string& fontname, int* raw_score, vector* ch_flags) { PangoFontInfo font_info; if (!font_info.ParseFontDescriptionName(fontname)) { diff --git a/training/pango_font_info.h b/training/pango_font_info.h index fc46fcf48b..09a43fab14 100644 --- a/training/pango_font_info.h +++ b/training/pango_font_info.h @@ -24,10 +24,16 @@ #include #include +#include "commandlineflags.h" #include "hashfn.h" #include "host.h" -#include "util.h" #include "pango/pango-font.h" +#include "pango/pango.h" +#include "pango/pangocairo.h" +#include "util.h" + +DECLARE_STRING_PARAM_FLAG(fonts_dir); +DECLARE_STRING_PARAM_FLAG(fontconfig_tmpdir); typedef signed int char32; @@ -44,6 +50,7 @@ class PangoFontInfo { DECORATIVE, }; PangoFontInfo(); + ~PangoFontInfo(); // Initialize from parsing a font description name, defined as a string of the // format: // "FamilyName [FaceName] [PointSize]" @@ -83,10 +90,14 @@ class PangoFontInfo { bool GetSpacingProperties(const string& utf8_char, int* x_bearing, int* x_advance) const; - // Initializes FontConfig by setting its environment variable and creating - // a fonts.conf file that points to the given fonts_dir. Once initialized, - // it is not re-initialized unless force_clear is true. - static void InitFontConfig(bool force_clear, const string& fonts_dir); + // If not already initialized, initializes FontConfig by setting its + // environment variable and creating a fonts.conf file that points to the + // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir. + static void SoftInitFontConfig(); + // Re-initializes font config, whether or not already initialized. + // If already initialized, any existing cache is deleted, just to be sure. + static void HardInitFontConfig(const string& fonts_dir, + const string& cache_dir); // Accessors string DescriptionName() const; @@ -130,8 +141,14 @@ class PangoFontInfo { int resolution_; // Fontconfig operates through an environment variable, so it intrinsically // cannot be thread-friendly, but you can serialize multiple independent - // font configurations by calling InitFontConfig(true, path). - static bool fontconfig_initialized_; + // font configurations by calling HardInitFontConfig(fonts_dir, cache_dir). + // These hold the last initialized values set by HardInitFontConfig or + // the first call to SoftInitFontConfig. + // Directory to be scanned for font files. + static string fonts_dir_; + // Directory to store the cache of font information. (Can be the same as + // fonts_dir_) + static string cache_dir_; private: PangoFontInfo(const PangoFontInfo&); diff --git a/training/stringrenderer.cpp b/training/stringrenderer.cpp index 66bbf7d28e..e7f9699f18 100644 --- a/training/stringrenderer.cpp +++ b/training/stringrenderer.cpp @@ -347,6 +347,11 @@ void StringRenderer::ClearBoxes() { boxaDestroy(&page_boxes_); } +string StringRenderer::GetBoxesStr() { + BoxChar::PrepareToWrite(&boxchars_); + return BoxChar::GetTesseractBoxStr(page_height_, boxchars_); +} + void StringRenderer::WriteAllBoxes(const string& filename) { BoxChar::PrepareToWrite(&boxchars_); BoxChar::WriteTesseractBoxFile(filename, page_height_, boxchars_); @@ -395,7 +400,7 @@ bool StringRenderer::GetClusterStrings(vector* cluster_text) { it != start_byte_to_text.end(); ++it) { cluster_text->push_back(it->second); } - return cluster_text->size(); + return !cluster_text->empty(); } // Merges an array of BoxChars into words based on the identification of @@ -495,7 +500,7 @@ void StringRenderer::ComputeClusterBoxes() { const int end_byte_index = cluster_start_to_end_index[start_byte_index]; string cluster_text = string(text + start_byte_index, end_byte_index - start_byte_index); - if (cluster_text.size() && cluster_text[0] == '\n') { + if (!cluster_text.empty() && cluster_text[0] == '\n') { tlog(2, "Skipping newlines at start of text.\n"); continue; } @@ -595,11 +600,12 @@ void StringRenderer::ComputeClusterBoxes() { all_boxes = boxaCreate(0); boxaAddBox(all_boxes, page_boxchars[i]->mutable_box(), L_CLONE); } - boxaGetExtent(all_boxes, NULL, NULL, &page_box); - boxaDestroy(&all_boxes); - if (page_boxes_ == NULL) - page_boxes_ = boxaCreate(0); - boxaAddBox(page_boxes_, page_box, L_INSERT); + if (all_boxes != NULL) { + boxaGetExtent(all_boxes, NULL, NULL, &page_box); + boxaDestroy(&all_boxes); + if (page_boxes_ == NULL) page_boxes_ = boxaCreate(0); + boxaAddBox(page_boxes_, page_box, L_INSERT); + } } diff --git a/training/unicharset_training_utils.cpp b/training/unicharset_training_utils.cpp index 10aaf0e6c3..efa3a22cd5 100644 --- a/training/unicharset_training_utils.cpp +++ b/training/unicharset_training_utils.cpp @@ -37,7 +37,8 @@ namespace tesseract { // Helper sets the character attribute properties and sets up the script table. // Does not set tops and bottoms. -void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) { +void SetupBasicProperties(bool report_errors, bool decompose, + UNICHARSET* unicharset) { for (int unichar_id = 0; unichar_id < unicharset->size(); ++unichar_id) { // Convert any custom ligatures. const char* unichar_str = unicharset->id_to_unichar(unichar_id); @@ -129,7 +130,7 @@ void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) { } // Record normalized version of this unichar. - STRING normed_str = tesseract::NormalizeUTF8String(unichar_str); + STRING normed_str = tesseract::NormalizeUTF8String(decompose, unichar_str); if (unichar_id != 0 && normed_str.length() > 0) { unicharset->set_normed(unichar_id, normed_str.c_str()); } else { @@ -158,7 +159,7 @@ void SetPropertiesForInputFile(const string& script_dir, // Set unichar properties tprintf("Setting unichar properties\n"); - SetupBasicProperties(true, &unicharset); + SetupBasicProperties(true, false, &unicharset); string xheights_str; for (int s = 0; s < unicharset.get_script_table_size(); ++s) { // Load the unicharset for the script if available. diff --git a/training/unicharset_training_utils.h b/training/unicharset_training_utils.h index ff2262875d..f03e12ace4 100644 --- a/training/unicharset_training_utils.h +++ b/training/unicharset_training_utils.h @@ -33,7 +33,13 @@ namespace tesseract { // Helper sets the character attribute properties and sets up the script table. // Does not set tops and bottoms. -void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset); +void SetupBasicProperties(bool report_errors, bool decompose, + UNICHARSET* unicharset); +// Default behavior is to compose, until it is proven that decomposed benefits +// at least one language. +inline void SetupBasicProperties(bool report_errors, UNICHARSET* unicharset) { + SetupBasicProperties(report_errors, false, unicharset); +} // Helper to set the properties for an input unicharset file, writes to the // output file. If an appropriate script unicharset can be found in the From c1d37120a5d6723c37470e72672c29d242c8cb4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 7 Dec 2016 15:55:27 +0100 Subject: [PATCH 054/132] backport from 4.00: imagedata --- ccstruct/imagedata.cpp | 402 ++++++++++++++++++++++++++++++----------- ccstruct/imagedata.h | 135 +++++++++++--- 2 files changed, 412 insertions(+), 125 deletions(-) diff --git a/ccstruct/imagedata.cpp b/ccstruct/imagedata.cpp index 3c244c7724..11e1c8629f 100644 --- a/ccstruct/imagedata.cpp +++ b/ccstruct/imagedata.cpp @@ -30,6 +30,14 @@ #include "helpers.h" #include "tprintf.h" +#if __cplusplus > 199711L // C++11 support + #include +#endif + +// Number of documents to read ahead while training. Doesn't need to be very +// large. +const int kMaxReadAhead = 8; + namespace tesseract { WordFeature::WordFeature() : x_(0), y_(0), dir_(0) { @@ -182,6 +190,19 @@ bool ImageData::DeSerialize(bool swap, TFile* fp) { return true; } +// As DeSerialize, but only seeks past the data - hence a static method. +bool ImageData::SkipDeSerialize(bool swap, TFile* fp) { + if (!STRING::SkipDeSerialize(swap, fp)) return false; + inT32 page_number; + if (fp->FRead(&page_number, sizeof(page_number), 1) != 1) return false; + if (!GenericVector::SkipDeSerialize(swap, fp)) return false; + if (!STRING::SkipDeSerialize(swap, fp)) return false; + if (!GenericVector::SkipDeSerialize(swap, fp)) return false; + if (!GenericVector::SkipDeSerializeClasses(swap, fp)) return false; + inT8 vertical = 0; + return fp->FRead(&vertical, sizeof(vertical), 1) == 1; +} + // Saves the given Pix as a PNG-encoded string and destroys it. void ImageData::SetPix(Pix* pix) { SetPixInternal(pix, &image_data_); @@ -195,37 +216,34 @@ Pix* ImageData::GetPix() const { // Gets anything and everything with a non-NULL pointer, prescaled to a // given target_height (if 0, then the original image height), and aligned. // Also returns (if not NULL) the width and height of the scaled image. -// The return value is the scale factor that was applied to the image to -// achieve the target_height. -float ImageData::PreScale(int target_height, Pix** pix, - int* scaled_width, int* scaled_height, - GenericVector* boxes) const { +// The return value is the scaled Pix, which must be pixDestroyed after use, +// and scale_factor (if not NULL) is set to the scale factor that was applied +// to the image to achieve the target_height. +Pix* ImageData::PreScale(int target_height, int max_height, float* scale_factor, + int* scaled_width, int* scaled_height, + GenericVector* boxes) const { int input_width = 0; int input_height = 0; Pix* src_pix = GetPix(); ASSERT_HOST(src_pix != NULL); input_width = pixGetWidth(src_pix); input_height = pixGetHeight(src_pix); - if (target_height == 0) - target_height = input_height; + if (target_height == 0) { + target_height = MIN(input_height, max_height); + } float im_factor = static_cast(target_height) / input_height; if (scaled_width != NULL) *scaled_width = IntCastRounded(im_factor * input_width); if (scaled_height != NULL) *scaled_height = target_height; - if (pix != NULL) { - // Get the scaled image. - pixDestroy(pix); - *pix = pixScale(src_pix, im_factor, im_factor); - if (*pix == NULL) { - tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", - input_width, input_height, im_factor); - } - if (scaled_width != NULL) - *scaled_width = pixGetWidth(*pix); - if (scaled_height != NULL) - *scaled_height = pixGetHeight(*pix); + // Get the scaled image. + Pix* pix = pixScale(src_pix, im_factor, im_factor); + if (pix == NULL) { + tprintf("Scaling pix of size %d, %d by factor %g made null pix!!\n", + input_width, input_height, im_factor); } + if (scaled_width != NULL) *scaled_width = pixGetWidth(pix); + if (scaled_height != NULL) *scaled_height = pixGetHeight(pix); pixDestroy(&src_pix); if (boxes != NULL) { // Get the boxes. @@ -241,7 +259,8 @@ float ImageData::PreScale(int target_height, Pix** pix, boxes->push_back(box); } } - return im_factor; + if (scale_factor != NULL) *scale_factor = im_factor; + return pix; } int ImageData::MemoryUsed() const { @@ -266,19 +285,20 @@ void ImageData::Display() const { // Draw the boxes. win->Pen(ScrollView::RED); win->Brush(ScrollView::NONE); - win->TextAttributes("Arial", kTextSize, false, false, false); - for (int b = 0; b < boxes_.size(); ++b) { - boxes_[b].plot(win); - win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].string()); - TBOX scaled(boxes_[b]); - scaled.scale(256.0 / height); - scaled.plot(win); + int text_size = kTextSize; + if (!boxes_.empty() && boxes_[0].height() * 2 < text_size) + text_size = boxes_[0].height() * 2; + win->TextAttributes("Arial", text_size, false, false, false); + if (!boxes_.empty()) { + for (int b = 0; b < boxes_.size(); ++b) { + boxes_[b].plot(win); + win->Text(boxes_[b].left(), height + kTextSize, box_texts_[b].string()); + } + } else { + // The full transcription. + win->Pen(ScrollView::CYAN); + win->Text(0, height + kTextSize * 2, transcription_.string()); } - // The full transcription. - win->Pen(ScrollView::CYAN); - win->Text(0, height + kTextSize * 2, transcription_.string()); - // Add the features. - win->Pen(ScrollView::GREEN); win->Update(); window_wait(win); #endif @@ -340,27 +360,51 @@ bool ImageData::AddBoxes(const char* box_text) { return false; } +// Thread function to call ReCachePages. +void* ReCachePagesFunc(void* data) { + DocumentData* document_data = reinterpret_cast(data); + document_data->ReCachePages(); + return NULL; +} + DocumentData::DocumentData(const STRING& name) - : document_name_(name), pages_offset_(0), total_pages_(0), - memory_used_(0), max_memory_(0), reader_(NULL) {} + : document_name_(name), + pages_offset_(-1), + total_pages_(-1), + memory_used_(0), + max_memory_(0), + reader_(NULL) {} -DocumentData::~DocumentData() {} +DocumentData::~DocumentData() { + SVAutoLock lock_p(&pages_mutex_); + SVAutoLock lock_g(&general_mutex_); +} // Reads all the pages in the given lstmf filename to the cache. The reader // is used to read the file. bool DocumentData::LoadDocument(const char* filename, const char* lang, int start_page, inT64 max_memory, FileReader reader) { + SetDocument(filename, lang, max_memory, reader); + pages_offset_ = start_page; + return ReCachePages(); +} + +// Sets up the document, without actually loading it. +void DocumentData::SetDocument(const char* filename, const char* lang, + inT64 max_memory, FileReader reader) { + SVAutoLock lock_p(&pages_mutex_); + SVAutoLock lock(&general_mutex_); document_name_ = filename; lang_ = lang; - pages_offset_ = start_page; + pages_offset_ = -1; max_memory_ = max_memory; reader_ = reader; - return ReCachePages(); } // Writes all the pages to the given filename. Returns false on error. bool DocumentData::SaveDocument(const char* filename, FileWriter writer) { + SVAutoLock lock(&pages_mutex_); TFile fp; fp.OpenWrite(NULL); if (!pages_.Serialize(&fp) || !fp.CloseWrite(filename, writer)) { @@ -370,112 +414,169 @@ bool DocumentData::SaveDocument(const char* filename, FileWriter writer) { return true; } bool DocumentData::SaveToBuffer(GenericVector* buffer) { + SVAutoLock lock(&pages_mutex_); TFile fp; fp.OpenWrite(buffer); return pages_.Serialize(&fp); } +// Adds the given page data to this document, counting up memory. +void DocumentData::AddPageToDocument(ImageData* page) { + SVAutoLock lock(&pages_mutex_); + pages_.push_back(page); + set_memory_used(memory_used() + page->MemoryUsed()); +} + +// If the given index is not currently loaded, loads it using a separate +// thread. +void DocumentData::LoadPageInBackground(int index) { + ImageData* page = NULL; + if (IsPageAvailable(index, &page)) return; + SVAutoLock lock(&pages_mutex_); + if (pages_offset_ == index) return; + pages_offset_ = index; + pages_.clear(); + SVSync::StartThread(ReCachePagesFunc, this); +} + // Returns a pointer to the page with the given index, modulo the total -// number of pages, recaching if needed. +// number of pages. Blocks until the background load is completed. const ImageData* DocumentData::GetPage(int index) { - index = Modulo(index, total_pages_); - if (index < pages_offset_ || index >= pages_offset_ + pages_.size()) { - pages_offset_ = index; - if (!ReCachePages()) return NULL; + ImageData* page = NULL; + while (!IsPageAvailable(index, &page)) { + // If there is no background load scheduled, schedule one now. + pages_mutex_.Lock(); + bool needs_loading = pages_offset_ != index; + pages_mutex_.Unlock(); + if (needs_loading) LoadPageInBackground(index); + // We can't directly load the page, or the background load will delete it + // while the caller is using it, so give it a chance to work. +#if __cplusplus > 199711L // C++11 support + //TODO: We need to fix this for compilers without C++11 support (e.g. VS2010) + std::this_thread::sleep_for(std::chrono::seconds(1)); +#endif + } + return page; +} + +// Returns true if the requested page is available, and provides a pointer, +// which may be NULL if the document is empty. May block, even though it +// doesn't guarantee to return true. +bool DocumentData::IsPageAvailable(int index, ImageData** page) { + SVAutoLock lock(&pages_mutex_); + int num_pages = NumPages(); + if (num_pages == 0 || index < 0) { + *page = NULL; // Empty Document. + return true; + } + if (num_pages > 0) { + index = Modulo(index, num_pages); + if (pages_offset_ <= index && index < pages_offset_ + pages_.size()) { + *page = pages_[index - pages_offset_]; // Page is available already. + return true; + } } - return pages_[index - pages_offset_]; + return false; +} + +// Removes all pages from memory and frees the memory, but does not forget +// the document metadata. +inT64 DocumentData::UnCache() { + SVAutoLock lock(&pages_mutex_); + inT64 memory_saved = memory_used(); + pages_.clear(); + pages_offset_ = -1; + set_total_pages(-1); + set_memory_used(0); + tprintf("Unloaded document %s, saving %d memory\n", document_name_.string(), + memory_saved); + return memory_saved; } -// Loads as many pages can fit in max_memory_ starting at index pages_offset_. +// Locks the pages_mutex_ and Loads as many pages can fit in max_memory_ +// starting at index pages_offset_. bool DocumentData::ReCachePages() { + SVAutoLock lock(&pages_mutex_); // Read the file. + set_total_pages(0); + set_memory_used(0); + int loaded_pages = 0; + pages_.truncate(0); TFile fp; - if (!fp.Open(document_name_, reader_)) return false; - memory_used_ = 0; - if (!pages_.DeSerialize(false, &fp)) { - tprintf("Deserialize failed: %s\n", document_name_.string()); - pages_.truncate(0); + if (!fp.Open(document_name_, reader_) || + !PointerVector::DeSerializeSize(false, &fp, &loaded_pages) || + loaded_pages <= 0) { + tprintf("Deserialize header failed: %s\n", document_name_.string()); return false; } - total_pages_ = pages_.size(); - pages_offset_ %= total_pages_; - // Delete pages before the first one we want, and relocate the rest. + pages_offset_ %= loaded_pages; + // Skip pages before the first one we want, and load the rest until max + // memory and skip the rest after that. int page; - for (page = 0; page < pages_.size(); ++page) { - if (page < pages_offset_) { - delete pages_[page]; - pages_[page] = NULL; + for (page = 0; page < loaded_pages; ++page) { + if (page < pages_offset_ || + (max_memory_ > 0 && memory_used() > max_memory_)) { + if (!PointerVector::DeSerializeSkip(false, &fp)) break; } else { - ImageData* image_data = pages_[page]; - if (max_memory_ > 0 && page > pages_offset_ && - memory_used_ + image_data->MemoryUsed() > max_memory_) - break; // Don't go over memory quota unless the first image. + if (!pages_.DeSerializeElement(false, &fp)) break; + ImageData* image_data = pages_.back(); if (image_data->imagefilename().length() == 0) { image_data->set_imagefilename(document_name_); image_data->set_page_number(page); } image_data->set_language(lang_); - memory_used_ += image_data->MemoryUsed(); - if (pages_offset_ != 0) { - pages_[page - pages_offset_] = image_data; - pages_[page] = NULL; - } + set_memory_used(memory_used() + image_data->MemoryUsed()); } } - pages_.truncate(page - pages_offset_); - tprintf("Loaded %d/%d pages (%d-%d) of document %s\n", - pages_.size(), total_pages_, pages_offset_, - pages_offset_ + pages_.size(), document_name_.string()); + if (page < loaded_pages) { + tprintf("Deserialize failed: %s read %d/%d pages\n", + document_name_.string(), page, loaded_pages); + pages_.truncate(0); + } else { + tprintf("Loaded %d/%d pages (%d-%d) of document %s\n", pages_.size(), + loaded_pages, pages_offset_, pages_offset_ + pages_.size(), + document_name_.string()); + } + set_total_pages(loaded_pages); return !pages_.empty(); } -// Adds the given page data to this document, counting up memory. -void DocumentData::AddPageToDocument(ImageData* page) { - pages_.push_back(page); - memory_used_ += page->MemoryUsed(); -} - // A collection of DocumentData that knows roughly how much memory it is using. DocumentCache::DocumentCache(inT64 max_memory) - : total_pages_(0), memory_used_(0), max_memory_(max_memory) {} + : num_pages_per_doc_(0), max_memory_(max_memory) {} DocumentCache::~DocumentCache() {} // Adds all the documents in the list of filenames, counting memory. // The reader is used to read the files. bool DocumentCache::LoadDocuments(const GenericVector& filenames, - const char* lang, FileReader reader) { - inT64 fair_share_memory = max_memory_ / filenames.size(); + const char* lang, + CachingStrategy cache_strategy, + FileReader reader) { + cache_strategy_ = cache_strategy; + inT64 fair_share_memory = 0; + // In the round-robin case, each DocumentData handles restricting its content + // to its fair share of memory. In the sequential case, DocumentCache + // determines which DocumentDatas are held entirely in memory. + if (cache_strategy_ == CS_ROUND_ROBIN) + fair_share_memory = max_memory_ / filenames.size(); for (int arg = 0; arg < filenames.size(); ++arg) { STRING filename = filenames[arg]; DocumentData* document = new DocumentData(filename); - if (document->LoadDocument(filename.string(), lang, 0, - fair_share_memory, reader)) { - AddToCache(document); - } else { - tprintf("Failed to load image %s!\n", filename.string()); - delete document; - } + document->SetDocument(filename.string(), lang, fair_share_memory, reader); + AddToCache(document); } - tprintf("Loaded %d pages, total %gMB\n", - total_pages_, memory_used_ / 1048576.0); - return total_pages_ > 0; + if (!documents_.empty()) { + // Try to get the first page now to verify the list of filenames. + if (GetPageBySerial(0) != NULL) return true; + tprintf("Load of page 0 failed!\n"); + } + return false; } -// Adds document to the cache, throwing out other documents if needed. +// Adds document to the cache. bool DocumentCache::AddToCache(DocumentData* data) { inT64 new_memory = data->memory_used(); - memory_used_ += new_memory; documents_.push_back(data); - total_pages_ += data->NumPages(); - // Delete the first item in the array, and other pages of the same name - // while memory is full. - while (memory_used_ >= max_memory_ && max_memory_ > 0) { - tprintf("Memory used=%lld vs max=%lld, discarding doc of size %lld\n", - memory_used_ , max_memory_, documents_[0]->memory_used()); - memory_used_ -= documents_[0]->memory_used(); - total_pages_ -= documents_[0]->NumPages(); - documents_.remove(0); - } return true; } @@ -488,11 +589,104 @@ DocumentData* DocumentCache::FindDocument(const STRING& document_name) const { return NULL; } +// Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache +// strategy, could take a long time. +int DocumentCache::TotalPages() { + if (cache_strategy_ == CS_SEQUENTIAL) { + // In sequential mode, we assume each doc has the same number of pages + // whether it is true or not. + if (num_pages_per_doc_ == 0) GetPageSequential(0); + return num_pages_per_doc_ * documents_.size(); + } + int total_pages = 0; + int num_docs = documents_.size(); + for (int d = 0; d < num_docs; ++d) { + // We have to load a page to make NumPages() valid. + documents_[d]->GetPage(0); + total_pages += documents_[d]->NumPages(); + } + return total_pages; +} + // Returns a page by serial number, selecting them in a round-robin fashion -// from all the documents. -const ImageData* DocumentCache::GetPageBySerial(int serial) { - int document_index = serial % documents_.size(); - return documents_[document_index]->GetPage(serial / documents_.size()); +// from all the documents. Highly disk-intensive, but doesn't need samples +// to be shuffled between files to begin with. +const ImageData* DocumentCache::GetPageRoundRobin(int serial) { + int num_docs = documents_.size(); + int doc_index = serial % num_docs; + const ImageData* doc = documents_[doc_index]->GetPage(serial / num_docs); + for (int offset = 1; offset <= kMaxReadAhead && offset < num_docs; ++offset) { + doc_index = (serial + offset) % num_docs; + int page = (serial + offset) / num_docs; + documents_[doc_index]->LoadPageInBackground(page); + } + return doc; +} + +// Returns a page by serial number, selecting them in sequence from each file. +// Requires the samples to be shuffled between the files to give a random or +// uniform distribution of data. Less disk-intensive than GetPageRoundRobin. +const ImageData* DocumentCache::GetPageSequential(int serial) { + int num_docs = documents_.size(); + ASSERT_HOST(num_docs > 0); + if (num_pages_per_doc_ == 0) { + // Use the pages in the first doc as the number of pages in each doc. + documents_[0]->GetPage(0); + num_pages_per_doc_ = documents_[0]->NumPages(); + if (num_pages_per_doc_ == 0) { + tprintf("First document cannot be empty!!\n"); + ASSERT_HOST(num_pages_per_doc_ > 0); + } + // Get rid of zero now if we don't need it. + if (serial / num_pages_per_doc_ % num_docs > 0) documents_[0]->UnCache(); + } + int doc_index = serial / num_pages_per_doc_ % num_docs; + const ImageData* doc = + documents_[doc_index]->GetPage(serial % num_pages_per_doc_); + // Count up total memory. Background loading makes it more complicated to + // keep a running count. + inT64 total_memory = 0; + for (int d = 0; d < num_docs; ++d) { + total_memory += documents_[d]->memory_used(); + } + if (total_memory >= max_memory_) { + // Find something to un-cache. + // If there are more than 3 in front, then serial is from the back reader + // of a pair of readers. If we un-cache from in-front-2 to 2-ahead, then + // we create a hole between them and then un-caching the backmost occupied + // will work for both. + int num_in_front = CountNeighbourDocs(doc_index, 1); + for (int offset = num_in_front - 2; + offset > 1 && total_memory >= max_memory_; --offset) { + int next_index = (doc_index + offset) % num_docs; + total_memory -= documents_[next_index]->UnCache(); + } + // If that didn't work, the best solution is to un-cache from the back. If + // we take away the document that a 2nd reader is using, it will put it + // back and make a hole between. + int num_behind = CountNeighbourDocs(doc_index, -1); + for (int offset = num_behind; offset < 0 && total_memory >= max_memory_; + ++offset) { + int next_index = (doc_index + offset + num_docs) % num_docs; + total_memory -= documents_[next_index]->UnCache(); + } + } + int next_index = (doc_index + 1) % num_docs; + if (!documents_[next_index]->IsCached() && total_memory < max_memory_) { + documents_[next_index]->LoadPageInBackground(0); + } + return doc; +} + +// Helper counts the number of adjacent cached neighbours of index looking in +// direction dir, ie index+dir, index+2*dir etc. +int DocumentCache::CountNeighbourDocs(int index, int dir) { + int num_docs = documents_.size(); + for (int offset = dir; abs(offset) < num_docs; offset += dir) { + int offset_index = (index + offset + num_docs) % num_docs; + if (!documents_[offset_index]->IsCached()) return offset - dir; + } + return num_docs; } } // namespace tesseract. diff --git a/ccstruct/imagedata.h b/ccstruct/imagedata.h index 6321f121b1..ae6722934e 100644 --- a/ccstruct/imagedata.h +++ b/ccstruct/imagedata.h @@ -25,6 +25,7 @@ #include "normalis.h" #include "rect.h" #include "strngs.h" +#include "svutil.h" struct Pix; @@ -34,8 +35,22 @@ namespace tesseract { const int kFeaturePadding = 2; // Number of pixels to pad around text boxes. const int kImagePadding = 4; -// Number of training images to combine into a mini-batch for training. -const int kNumPagesPerMiniBatch = 100; + +// Enum to determine the caching and data sequencing strategy. +enum CachingStrategy { + // Reads all of one file before moving on to the next. Requires samples to be + // shuffled across files. Uses the count of samples in the first file as + // the count in all the files to achieve high-speed random access. As a + // consequence, if subsequent files are smaller, they get entries used more + // than once, and if subsequent files are larger, some entries are not used. + // Best for larger data sets that don't fit in memory. + CS_SEQUENTIAL, + // Reads one sample from each file in rotation. Does not require shuffled + // samples, but is extremely disk-intensive. Samples in smaller files also + // get used more often than samples in larger files. + // Best for smaller data sets that mostly fit in memory. + CS_ROUND_ROBIN, +}; class WordFeature { public: @@ -103,6 +118,8 @@ class ImageData { // Reads from the given file. Returns false in case of error. // If swap is true, assumes a big/little-endian swap is needed. bool DeSerialize(bool swap, TFile* fp); + // As DeSerialize, but only seeks past the data - hence a static method. + static bool SkipDeSerialize(bool swap, tesseract::TFile* fp); // Other accessors. const STRING& imagefilename() const { @@ -145,11 +162,12 @@ class ImageData { // Gets anything and everything with a non-NULL pointer, prescaled to a // given target_height (if 0, then the original image height), and aligned. // Also returns (if not NULL) the width and height of the scaled image. - // The return value is the scale factor that was applied to the image to - // achieve the target_height. - float PreScale(int target_height, Pix** pix, - int* scaled_width, int* scaled_height, - GenericVector* boxes) const; + // The return value is the scaled Pix, which must be pixDestroyed after use, + // and scale_factor (if not NULL) is set to the scale factor that was applied + // to the image to achieve the target_height. + Pix* PreScale(int target_height, int max_height, float* scale_factor, + int* scaled_width, int* scaled_height, + GenericVector* boxes) const; int MemoryUsed() const; @@ -184,6 +202,8 @@ class ImageData { // A collection of ImageData that knows roughly how much memory it is using. class DocumentData { + friend void* ReCachePagesFunc(void* data); + public: explicit DocumentData(const STRING& name); ~DocumentData(); @@ -192,6 +212,9 @@ class DocumentData { // is used to read the file. bool LoadDocument(const char* filename, const char* lang, int start_page, inT64 max_memory, FileReader reader); + // Sets up the document, without actually loading it. + void SetDocument(const char* filename, const char* lang, inT64 max_memory, + FileReader reader); // Writes all the pages to the given filename. Returns false on error. bool SaveDocument(const char* filename, FileWriter writer); bool SaveToBuffer(GenericVector* buffer); @@ -200,26 +223,62 @@ class DocumentData { void AddPageToDocument(ImageData* page); const STRING& document_name() const { + SVAutoLock lock(&general_mutex_); return document_name_; } int NumPages() const { + SVAutoLock lock(&general_mutex_); return total_pages_; } inT64 memory_used() const { + SVAutoLock lock(&general_mutex_); return memory_used_; } + // If the given index is not currently loaded, loads it using a separate + // thread. Note: there are 4 cases: + // Document uncached: IsCached() returns false, total_pages_ < 0. + // Required page is available: IsPageAvailable returns true. In this case, + // total_pages_ > 0 and + // pages_offset_ <= index%total_pages_ <= pages_offset_+pages_.size() + // Pages are loaded, but the required one is not. + // The requested page is being loaded by LoadPageInBackground. In this case, + // index == pages_offset_. Once the loading starts, the pages lock is held + // until it completes, at which point IsPageAvailable will unblock and return + // true. + void LoadPageInBackground(int index); // Returns a pointer to the page with the given index, modulo the total - // number of pages, recaching if needed. + // number of pages. Blocks until the background load is completed. const ImageData* GetPage(int index); + // Returns true if the requested page is available, and provides a pointer, + // which may be NULL if the document is empty. May block, even though it + // doesn't guarantee to return true. + bool IsPageAvailable(int index, ImageData** page); // Takes ownership of the given page index. The page is made NULL in *this. ImageData* TakePage(int index) { + SVAutoLock lock(&pages_mutex_); ImageData* page = pages_[index]; pages_[index] = NULL; return page; } + // Returns true if the document is currently loaded or in the process of + // loading. + bool IsCached() const { return NumPages() >= 0; } + // Removes all pages from memory and frees the memory, but does not forget + // the document metadata. Returns the memory saved. + inT64 UnCache(); private: - // Loads as many pages can fit in max_memory_ starting at index pages_offset_. + // Sets the value of total_pages_ behind a mutex. + void set_total_pages(int total) { + SVAutoLock lock(&general_mutex_); + total_pages_ = total; + } + void set_memory_used(inT64 memory_used) { + SVAutoLock lock(&general_mutex_); + memory_used_ = memory_used; + } + // Locks the pages_mutex_ and Loads as many pages can fit in max_memory_ + // starting at index pages_offset_. bool ReCachePages(); private: @@ -239,43 +298,77 @@ class DocumentData { inT64 max_memory_; // Saved reader from LoadDocument to allow re-caching. FileReader reader_; + // Mutex that protects pages_ and pages_offset_ against multiple parallel + // loads, and provides a wait for page. + SVMutex pages_mutex_; + // Mutex that protects other data members that callers want to access without + // waiting for a load operation. + mutable SVMutex general_mutex_; }; // A collection of DocumentData that knows roughly how much memory it is using. +// Note that while it supports background read-ahead, it assumes that a single +// thread is accessing documents, ie it is not safe for multiple threads to +// access different documents in parallel, as one may de-cache the other's +// content. class DocumentCache { public: explicit DocumentCache(inT64 max_memory); ~DocumentCache(); + // Deletes all existing documents from the cache. + void Clear() { + documents_.clear(); + num_pages_per_doc_ = 0; + } // Adds all the documents in the list of filenames, counting memory. // The reader is used to read the files. bool LoadDocuments(const GenericVector& filenames, const char* lang, - FileReader reader); + CachingStrategy cache_strategy, FileReader reader); - // Adds document to the cache, throwing out other documents if needed. + // Adds document to the cache. bool AddToCache(DocumentData* data); // Finds and returns a document by name. DocumentData* FindDocument(const STRING& document_name) const; - // Returns a page by serial number, selecting them in a round-robin fashion - // from all the documents. - const ImageData* GetPageBySerial(int serial); + // Returns a page by serial number using the current cache_strategy_ to + // determine the mapping from serial number to page. + const ImageData* GetPageBySerial(int serial) { + if (cache_strategy_ == CS_SEQUENTIAL) + return GetPageSequential(serial); + else + return GetPageRoundRobin(serial); + } const PointerVector& documents() const { return documents_; } - int total_pages() const { - return total_pages_; - } + // Returns the total number of pages in an epoch. For CS_ROUND_ROBIN cache + // strategy, could take a long time. + int TotalPages(); private: + // Returns a page by serial number, selecting them in a round-robin fashion + // from all the documents. Highly disk-intensive, but doesn't need samples + // to be shuffled between files to begin with. + const ImageData* GetPageRoundRobin(int serial); + // Returns a page by serial number, selecting them in sequence from each file. + // Requires the samples to be shuffled between the files to give a random or + // uniform distribution of data. Less disk-intensive than GetPageRoundRobin. + const ImageData* GetPageSequential(int serial); + + // Helper counts the number of adjacent cached neighbour documents_ of index + // looking in direction dir, ie index+dir, index+2*dir etc. + int CountNeighbourDocs(int index, int dir); + // A group of pages that corresponds in some loose way to a document. PointerVector documents_; - // Total of all pages. - int total_pages_; - // Total of all memory used by the cache. - inT64 memory_used_; + // Strategy to use for caching and serializing data samples. + CachingStrategy cache_strategy_; + // Number of pages in the first document, used as a divisor in + // GetPageSequential to determine the document index. + int num_pages_per_doc_; // Max memory allowed in this cache. inT64 max_memory_; }; From 46a84e761ff83e96c28611e1e76038f50e561a5a Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 25 Nov 2016 17:01:55 +0100 Subject: [PATCH 055/132] opencl: Clean whitespace issues in OpenCL kernel code * Remove whitespace at line endings * Replace tabs by spaces Signed-off-by: Stefan Weil --- opencl/oclkernels.h | 170 ++++++++++++++++++++++---------------------- 1 file changed, 85 insertions(+), 85 deletions(-) diff --git a/opencl/oclkernels.h b/opencl/oclkernels.h index f3912e1978..39de265573 100644 --- a/opencl/oclkernels.h +++ b/opencl/oclkernels.h @@ -58,7 +58,7 @@ KERNEL( ) KERNEL( -\n__kernel void pixSubtract(__global int *dword, __global int *sword, +\n__kernel void pixSubtract(__global int *dword, __global int *sword, const int wpl, const int h, __global int *outword) { const unsigned int row = get_global_id(1); @@ -113,15 +113,15 @@ KERNEL( unsigned int prevword, nextword, currword,tempword; unsigned int destword; const int col = pos % wpl; - + //Ignore the execss if (pos >= (wpl * h)) return; - - - currword = *(sword + pos); + + + currword = *(sword + pos); destword = currword; - + //Handle boundary conditions if(col==0) prevword=0; @@ -132,9 +132,9 @@ KERNEL( nextword=0; else nextword = *(sword + pos + 1); - + //Loop unrolled - + //1 bit to left and 1 bit to right //Get the max value on LHS of every pixel tempword = (prevword << (31)) | ((currword >> 1)); @@ -150,10 +150,10 @@ KERNEL( //Get max value on RHS of every pixel tempword = (currword << 2) | (nextword >> (30)); destword |= tempword; - - + + *(dword + pos) = destword; - + }\n ) @@ -208,7 +208,7 @@ KERNEL( unsigned int destword, tempword, lastword, currword; unsigned int lnextword, lprevword, rnextword, rprevword, firstword, secondword; int i, j, siter, eiter; - + //Ignore the execss if (pos >= (wpl*h) || (xn < 1 && xp < 1)) return; @@ -233,7 +233,7 @@ KERNEL( firstword = 0x0; else firstword = *(sword + pos - 1); - + //Get next word if (col == (wpl - 1)) secondword = 0x0; @@ -245,7 +245,7 @@ KERNEL( { //Get the max value on LHS of every pixel tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0x0 : (firstword << (32-i)) | ((currword >> i)); - + destword |= tempword; //Get max value on RHS of every pixel @@ -274,11 +274,11 @@ KERNEL( else firstword = *(sword + row*wpl + siter); - if (eiter >= wpl) + if (eiter >= wpl) lastword = 0x0; else lastword = *(sword + row*wpl + eiter); - + for ( i = 1; i < nwords; i++) { //Gets LHS words @@ -288,14 +288,14 @@ KERNEL( secondword = *(sword + row*wpl + siter + i); lprevword = firstword << (32 - parbitsxn) | secondword >> parbitsxn; - + firstword = secondword; if ((siter + i + 1) < 0) secondword = 0x0; else secondword = *(sword + row*wpl + siter + i + 1); - + lnextword = firstword << (32 - parbitsxn) | secondword >> parbitsxn; //Gets RHS words @@ -303,7 +303,7 @@ KERNEL( firstword = 0x0; else firstword = *(sword + row*wpl + eiter - i); - + rnextword = firstword << parbitsxp | lastword >> (32 - parbitsxp); lastword = firstword; @@ -333,7 +333,7 @@ KERNEL( lastword = firstword; firstword = secondword; } - + *(dword + pos) = destword; }\n ) @@ -350,14 +350,14 @@ KERNEL( unsigned int prevword, nextword, currword,tempword; unsigned int destword; int i; - + //Ignore the execss if (pos >= (wpl * h)) return; - currword = *(sword + pos); + currword = *(sword + pos); destword = currword; - + //Handle boundary conditions if(col==0) prevword=0; @@ -368,7 +368,7 @@ KERNEL( nextword=0; else nextword = *(sword + pos + 1); - + for (i = 1; i <= halfwidth; i++) { //Get the max value on LHS of every pixel @@ -385,7 +385,7 @@ KERNEL( //Get max value on RHS of every pixel tempword = (currword << i) | (nextword >> (32 - i)); - + destword |= tempword; } @@ -405,7 +405,7 @@ KERNEL( unsigned int tempword; unsigned int destword; int i, siter, eiter; - + //Ignore the execss if (row >= h || col >= wpl) return; @@ -435,27 +435,27 @@ KERNEL( unsigned int prevword, nextword, currword,tempword; unsigned int destword; const int col = pos % wpl; - + //Ignore the execss if (pos >= (wpl * h)) return; - - currword = *(sword + pos); + + currword = *(sword + pos); destword = currword; - + //Handle boundary conditions if(col==0) prevword=0xffffffff; else prevword = *(sword + pos - 1); - + if(col==(wpl - 1)) nextword=0xffffffff; else nextword = *(sword + pos + 1); - + //Loop unrolled - + //1 bit to left and 1 bit to right //Get the min value on LHS of every pixel tempword = (prevword << (31)) | ((currword >> 1)); @@ -471,10 +471,10 @@ KERNEL( //Get min value on RHS of every pixel tempword = (currword << 2) | (nextword >> (30)); destword &= tempword; - - + + *(dword + pos) = destword; - + }\n ) @@ -499,7 +499,7 @@ KERNEL( if (row < 2 || row >= (h - 2)) { destword = 0x0; - } + } else { //2 words above @@ -526,7 +526,7 @@ KERNEL( tempword = *(sword + i*wpl + col); destword &= tempword; - if (col == 0) + if (col == 0) { destword &= fwmask; } @@ -542,7 +542,7 @@ KERNEL( ) KERNEL( -\n__kernel void morphoErodeHor(__global int *sword,__global int *dword, const int xp, const int xn, const int wpl, +\n__kernel void morphoErodeHor(__global int *sword,__global int *dword, const int xp, const int xn, const int wpl, const int h, const char isAsymmetric, const int rwmask, const int lwmask) { const int col = get_global_id(0); @@ -577,7 +577,7 @@ KERNEL( firstword = 0xffffffff; else firstword = *(sword + pos - 1); - + //Get next word if (col == (wpl - 1)) secondword = 0xffffffff; @@ -593,7 +593,7 @@ KERNEL( //Get max value on RHS of every pixel tempword = ((i == parbitsxp) && (parbitsxp != parbitsxn)) ? 0xffffffff : (currword << i) | (secondword >> (32 - i)); - + //tempword = (currword << i) | (secondword >> (32 - i)); destword &= tempword; } @@ -622,18 +622,18 @@ KERNEL( *(dword + pos) = destword; return; } - + if (siter < 0) firstword = 0xffffffff; else firstword = *(sword + row*wpl + siter); - if (eiter >= wpl) + if (eiter >= wpl) lastword = 0xffffffff; else lastword = *(sword + row*wpl + eiter); - - + + for ( i = 1; i < nwords; i++) { //Gets LHS words @@ -643,14 +643,14 @@ KERNEL( secondword = *(sword + row*wpl + siter + i); lprevword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp); - + firstword = secondword; if ((siter + i + 1) < 0) secondword = 0xffffffff; else secondword = *(sword + row*wpl + siter + i + 1); - + lnextword = firstword << (32 - parbitsxp) | secondword >> (parbitsxp); //Gets RHS words @@ -658,7 +658,7 @@ KERNEL( firstword = 0xffffffff; else firstword = *(sword + row*wpl + eiter - i); - + rnextword = firstword << parbitsxn | lastword >> (32 - parbitsxn); lastword = firstword; @@ -688,7 +688,7 @@ KERNEL( lastword = firstword; firstword = secondword; } - + if (isAsymmetric) { //Clear boundary pixels @@ -708,8 +708,8 @@ KERNEL( KERNEL( \n__kernel void morphoErodeHor_32word(__global int *sword,__global int *dword, - const int halfwidth, const int wpl, - const int h, const char clearBoundPixH, + const int halfwidth, const int wpl, + const int h, const char clearBoundPixH, const int rwmask, const int lwmask, const char isEven) { @@ -723,25 +723,25 @@ KERNEL( if (pos >= (wpl * h)) return; - currword = *(sword + pos); + currword = *(sword + pos); destword = currword; - + //Handle boundary conditions if(col==0) prevword=0xffffffff; else prevword = *(sword + pos - 1); - + if(col==(wpl - 1)) nextword=0xffffffff; else nextword = *(sword + pos + 1); - + for (i = 1; i <= halfwidth; i++) { //Get the min value on LHS of every pixel tempword = (prevword << (32-i)) | ((currword >> i)); - + destword &= tempword; //Get min value on RHS of every pixel @@ -759,7 +759,7 @@ KERNEL( if (clearBoundPixH) { - if (col == 0) + if (col == 0) { destword &= rwmask; } @@ -775,7 +775,7 @@ KERNEL( KERNEL( \n__kernel void morphoErodeVer(__global int *sword,__global int *dword, - const int yp, + const int yp, const int wpl, const int h, const char clearBoundPixV, const int yn) { @@ -784,7 +784,7 @@ KERNEL( const unsigned int pos = row * wpl + col; unsigned int tempword, destword; int i, siter, eiter; - + //Ignore the execss if (row >= h || col >= wpl) return; @@ -804,7 +804,7 @@ KERNEL( //Clear boundary pixels if (clearBoundPixV && ((row < yp) || ((h - row) <= yn))) - { + { destword = 0x0; } @@ -892,23 +892,23 @@ KERNEL( \n __global const uchar* data, \n uint numPixels, \n __global uint *histBuffer) { // each wg will write HIST_SIZE*NUM_CHANNELS into this result; cpu will accumulate across wg's -\n +\n \n /* declare variables */ -\n +\n \n // work indices \n size_t groupId = get_group_id(0); \n size_t localId = get_local_id(0); // 0 -> 256-1 \n size_t globalId = get_global_id(0); // 0 -> 8*10*256-1=20480-1 \n uint numThreads = get_global_size(0); -\n +\n \n /* accumulate in global memory */ \n for ( uint pc = get_global_id(0); pc < numPixels; pc += get_global_size(0) ) { \n uchar value = data[ pc ]; \n int idx = value * get_global_size(0) + get_global_id(0); \n histBuffer[ idx ]++; -\n +\n \n } -\n +\n \n } // kernel_HistogramRectAllChannels_Grey ) @@ -1001,35 +1001,35 @@ void kernel_HistogramRectOneChannelReduction( KERNEL( // unused - // each work group (x256) handles a histogram bin + // each work group (x256) handles a histogram bin \n __attribute__((reqd_work_group_size(256, 1, 1))) \n __kernel \n void kernel_HistogramRectAllChannelsReduction_Grey( \n int n, // pixel redundancy that needs to be accumulated \n __global uint *histBuffer, \n __global uint* histResult) { // each wg accumulates 1 bin -\n +\n \n /* declare variables */ -\n +\n \n // work indices \n size_t groupId = get_group_id(0); \n size_t localId = get_local_id(0); // 0 -> 256-1 \n size_t globalId = get_global_id(0); // 0 -> 8*10*256-1=20480-1 \n uint numThreads = get_global_size(0); \n unsigned int hist = 0; -\n +\n \n /* accumulate in global memory */ \n for ( uint p = 0; p < n; p+=GROUP_SIZE) { \n hist += histBuffer[ (get_group_id(0)*n + p)]; \n } -\n +\n \n /* reduction in local memory */ \n // populate local memory \n __local unsigned int localHist[GROUP_SIZE]; \n localHist[localId] = hist; \n barrier(CLK_LOCAL_MEM_FENCE); -\n +\n \n for (int stride = GROUP_SIZE/2; stride >= 1; stride /= 2) { \n if (localId < stride) { \n hist = localHist[ (localId+stride)]; @@ -1040,10 +1040,10 @@ KERNEL( \n } \n barrier(CLK_LOCAL_MEM_FENCE); \n } -\n +\n \n if (localId == 0) \n histResult[get_group_id(0)] = localHist[0]; -\n +\n \n } // kernel_HistogramRectAllChannelsReduction_Grey ) @@ -1155,10 +1155,10 @@ void kernel_ThresholdRectToPix_OneChan( // for each pixel in burst for ( int p = 0; p < PIXELS_PER_BURST; p++) { - + //int littleEndianIdx = p ^ 3; //int bigEndianIdx = p; - int idx = + int idx = \n#ifdef __ENDIAN_LITTLE__\n p ^ 3; \n#else\n @@ -1179,30 +1179,30 @@ void kernel_ThresholdRectToPix_OneChan( KERNEL( -\n#define RED_SHIFT 24\n -\n#define GREEN_SHIFT 16\n -\n#define BLUE_SHIFT 8\n +\n#define RED_SHIFT 24\n +\n#define GREEN_SHIFT 16\n +\n#define BLUE_SHIFT 8\n \n#define SET_DATA_BYTE( pdata, n, val ) (*(l_uint8 *)((l_uintptr_t)((l_uint8 *)(pdata) + (n)) ^ 3) = (val))\n \n \n__attribute__((reqd_work_group_size(256, 1, 1)))\n \n__kernel\n \nvoid kernel_RGBToGray( __global const unsigned int *srcData, - __global unsigned char *dstData, + __global unsigned char *dstData, int srcWPL, int dstWPL, int height, int width, - float rwt, - float gwt, - float bwt ) { - + float rwt, + float gwt, + float bwt ) { + // pixel index int pixelIdx = get_global_id(0); if (pixelIdx >= height*width) return; - unsigned int word = srcData[pixelIdx]; - int output = (rwt * ((word >> RED_SHIFT) & 0xff) + + unsigned int word = srcData[pixelIdx]; + int output = (rwt * ((word >> RED_SHIFT) & 0xff) + gwt * ((word >> GREEN_SHIFT) & 0xff) + bwt * ((word >> BLUE_SHIFT) & 0xff) + 0.5); // SET_DATA_BYTE From 4ade2752fc657d94c4e1ab704e43db92a8385ed8 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 25 Nov 2016 17:23:48 +0100 Subject: [PATCH 056/132] opencl: Format OpenCL kernel code * Remove some empty lines to get a more uniform code * Fix #endif handling at end of file Signed-off-by: Stefan Weil --- opencl/oclkernels.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/opencl/oclkernels.h b/opencl/oclkernels.h index 39de265573..b353d0d870 100644 --- a/opencl/oclkernels.h +++ b/opencl/oclkernels.h @@ -1045,7 +1045,6 @@ KERNEL( \n histResult[get_group_id(0)] = localHist[0]; \n \n } // kernel_HistogramRectAllChannelsReduction_Grey - ) // ThresholdRectToPix Kernel @@ -1174,10 +1173,8 @@ void kernel_ThresholdRectToPix_OneChan( pix[w] = word; } } - ) - KERNEL( \n#define RED_SHIFT 24\n \n#define GREEN_SHIFT 16\n @@ -1209,10 +1206,9 @@ KERNEL( dstData[pixelIdx] = output; } ) -#endif ; // close char* #endif // USE_EXTERNAL_KERNEL -//#endif //_OCL_KERNEL_H_ +#endif //_OCL_KERNEL_H_ /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ From 892c007685d6970e7c5807f03e3ea53706338c3c Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 25 Nov 2016 17:31:41 +0100 Subject: [PATCH 057/132] opencl: Fix OpenCL kernel code assertion for newer versions With Debian package beignet-opencl-icd 1.2.1-1, Tesseract + OpenCL fails: [DS] Profile file not available (tesseract_opencl_profile_devices.dat); performing profiling. [DS] Device: "Intel(R) HD Graphics IvyBridge M GT2" (OpenCL) evaluation... ASSERTION FAILED: sel.hasDoubleType() at file /home/geier/beignet/backend/src/backend/gen_insn_selection.cpp, function void gbe::ConvertInstructionPattern::convertDoubleToSmallInts(gbe::Selection::Opaque&, const gbe::ir::ConvertInstruction&, bool&) const, line 5269 Trace/breakpoint trap Using a pure float expression (instead of double) fixes this issue. Signed-off-by: Stefan Weil --- opencl/oclkernels.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opencl/oclkernels.h b/opencl/oclkernels.h index b353d0d870..010ca661b8 100644 --- a/opencl/oclkernels.h +++ b/opencl/oclkernels.h @@ -1201,7 +1201,7 @@ KERNEL( unsigned int word = srcData[pixelIdx]; int output = (rwt * ((word >> RED_SHIFT) & 0xff) + gwt * ((word >> GREEN_SHIFT) & 0xff) + - bwt * ((word >> BLUE_SHIFT) & 0xff) + 0.5); + bwt * ((word >> BLUE_SHIFT) & 0xff) + 0.5f); // SET_DATA_BYTE dstData[pixelIdx] = output; } From c73f21aa6f3761ea89c55b095a69ec8b858272cb Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 13 Nov 2016 14:50:13 +0100 Subject: [PATCH 058/132] Simplify calls of free It is not necessary to check for null pointers. Signed-off-by: Stefan Weil --- opencl/opencl_device_selection.h | 15 ++++++--------- opencl/openclwrapper.cpp | 29 +++++++++-------------------- training/commontraining.cpp | 2 +- 3 files changed, 16 insertions(+), 30 deletions(-) diff --git a/opencl/opencl_device_selection.h b/opencl/opencl_device_selection.h index 238250cb70..5161125683 100644 --- a/opencl/opencl_device_selection.h +++ b/opencl/opencl_device_selection.h @@ -72,10 +72,8 @@ static ds_status releaseDSProfile(ds_profile* profile, ds_score_release sr) { if (profile->devices!=NULL && sr!=NULL) { unsigned int i; for (i = 0; i < profile->numDevices; i++) { - if (profile->devices[i].oclDeviceName) - free(profile->devices[i].oclDeviceName); - if (profile->devices[i].oclDriverVersion) - free(profile->devices[i].oclDriverVersion); + free(profile->devices[i].oclDeviceName); + free(profile->devices[i].oclDriverVersion); status = sr(profile->devices[i].score); if (status != DS_SUCCESS) break; @@ -171,15 +169,14 @@ static ds_status initDSProfile(ds_profile** p, const char* version) { profile->version = version; cleanup: - if (platforms) free(platforms); - if (devices) free(devices); + free(platforms); + free(devices); if (status == DS_SUCCESS) { *p = profile; } else { if (profile) { - if (profile->devices) - free(profile->devices); + free(profile->devices); free(profile); } } @@ -585,7 +582,7 @@ static ds_status readProfileFromFile(ds_profile* profile, } } cleanup: - if (contentStart!=NULL) free(contentStart); + free(contentStart); return status; } diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index 2a0420fb7c..7ca00f35a6 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -538,30 +538,19 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c // Release all resouces and memory for ( i = 0; i < numDevices; i++ ) { - if ( binaries[i] != NULL ) - { - free( binaries[i] ); - binaries[i] = NULL; - } + free(binaries[i]); + binaries[i] = NULL; } - if ( binaries != NULL ) - { - free( binaries ); - binaries = NULL; - } + free( binaries ); + binaries = NULL; - if ( binarySizes != NULL ) - { - free( binarySizes ); - binarySizes = NULL; - } + free(binarySizes); + binarySizes = NULL; - if ( mpArryDevsID != NULL ) - { - free( mpArryDevsID ); - mpArryDevsID = NULL; - } + free(mpArryDevsID); + mpArryDevsID = NULL; + return 1; } diff --git a/training/commontraining.cpp b/training/commontraining.cpp index f77a553de4..f415198bcf 100644 --- a/training/commontraining.cpp +++ b/training/commontraining.cpp @@ -524,7 +524,7 @@ CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, } CharID++; } - if ( Sample != NULL ) free( Sample ); + free( Sample ); return( Clusterer ); } /* SetUpForClustering */ From 743eb8104a15501e0af29711592ce6baa1470b62 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 23 Nov 2016 07:38:14 +0100 Subject: [PATCH 059/132] Simplify delete operations It is not necessary to check for null pointers. Signed-off-by: Stefan Weil --- api/baseapi.cpp | 3 +-- api/baseapi.h | 3 +-- ccmain/pageiterator.cpp | 8 +++----- ccmain/pgedit.cpp | 2 +- ccstruct/pdblock.h | 2 +- ccutil/hashfn.h | 2 +- classify/trainingsample.cpp | 4 ++-- classify/trainingsampleset.cpp | 6 ++---- cube/char_samp_set.cpp | 4 +--- cube/search_column.cpp | 4 +--- dict/dict.cpp | 8 +++----- textord/fpchop.cpp | 4 ++-- textord/makerow.cpp | 3 +-- wordrec/chopper.cpp | 8 ++------ wordrec/language_model.cpp | 2 +- 15 files changed, 23 insertions(+), 40 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 55ee8e3df1..bd4668afc9 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -831,8 +831,7 @@ int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { return -1; if (FindLines() != 0) return -1; - if (page_res_ != NULL) - delete page_res_; + delete page_res_; if (block_list_->empty()) { page_res_ = new PAGE_RES(false, block_list_, &tesseract_->prev_word_best_choice_); diff --git a/api/baseapi.h b/api/baseapi.h index ffe170b531..a4f88b67fa 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -373,8 +373,7 @@ class TESS_API TessBaseAPI { * delete it when it it is replaced or the API is destructed. */ void SetThresholder(ImageThresholder* thresholder) { - if (thresholder_ != NULL) - delete thresholder_; + delete thresholder_; thresholder_ = thresholder; ClearResults(); } diff --git a/ccmain/pageiterator.cpp b/ccmain/pageiterator.cpp index fc15840c44..7d7865ae30 100644 --- a/ccmain/pageiterator.cpp +++ b/ccmain/pageiterator.cpp @@ -87,7 +87,7 @@ const PageIterator& PageIterator::operator=(const PageIterator& src) { rect_top_ = src.rect_top_; rect_width_ = src.rect_width_; rect_height_ = src.rect_height_; - if (it_ != NULL) delete it_; + delete it_; it_ = new PAGE_RES_IT(*src.it_); BeginWord(src.blob_index_); return *this; @@ -597,10 +597,8 @@ void PageIterator::BeginWord(int offset) { } word_ = NULL; // We will be iterating the box_word. - if (cblob_it_ != NULL) { - delete cblob_it_; - cblob_it_ = NULL; - } + delete cblob_it_; + cblob_it_ = NULL; } else { // No recognition yet, so a "symbol" is a cblob. word_ = word_res->word; diff --git a/ccmain/pgedit.cpp b/ccmain/pgedit.cpp index d78c0dacc0..5e23595422 100644 --- a/ccmain/pgedit.cpp +++ b/ccmain/pgedit.cpp @@ -191,7 +191,7 @@ ScrollView* bln_word_window_handle() { // return handle */ void build_image_window(int width, int height) { - if (image_win != NULL) { delete image_win; } + delete image_win; image_win = new ScrollView(editor_image_win_name.string(), editor_image_xpos, editor_image_ypos, width + 1, diff --git a/ccstruct/pdblock.h b/ccstruct/pdblock.h index e9139f2ac5..1edd9aff50 100644 --- a/ccstruct/pdblock.h +++ b/ccstruct/pdblock.h @@ -51,7 +51,7 @@ class PDBLK { /// destructor ~PDBLK() { - if (hand_poly) delete hand_poly; + delete hand_poly; } POLY_BLOCK *poly_block() const { return hand_poly; } diff --git a/ccutil/hashfn.h b/ccutil/hashfn.h index ec96932107..73e15be9a8 100644 --- a/ccutil/hashfn.h +++ b/ccutil/hashfn.h @@ -63,7 +63,7 @@ template class SmartPtr { return ptr_; } void reset(T* ptr) { - if (ptr_ != NULL) delete ptr_; + delete ptr_; ptr_ = ptr; } bool operator==(const T* ptr) const { diff --git a/classify/trainingsample.cpp b/classify/trainingsample.cpp index 7fe83b7718..a88eb98bf0 100644 --- a/classify/trainingsample.cpp +++ b/classify/trainingsample.cpp @@ -209,7 +209,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type, int geo_type, CHAR_DESC_STRUCT* char_desc) { // Extract the INT features. - if (features_ != NULL) delete [] features_; + delete [] features_; FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type]; if (char_features == NULL) { tprintf("Error: no features to train on of type %s\n", @@ -230,7 +230,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type, } } // Extract the Micro features. - if (micro_features_ != NULL) delete [] micro_features_; + delete [] micro_features_; char_features = char_desc->FeatureSets[micro_type]; if (char_features == NULL) { tprintf("Error: no features to train on of type %s\n", diff --git a/classify/trainingsampleset.cpp b/classify/trainingsampleset.cpp index afbf3f420e..93936fcae6 100644 --- a/classify/trainingsampleset.cpp +++ b/classify/trainingsampleset.cpp @@ -96,10 +96,8 @@ bool TrainingSampleSet::DeSerialize(bool swap, FILE* fp) { num_raw_samples_ = samples_.size(); if (!unicharset_.load_from_file(fp)) return false; if (!font_id_map_.DeSerialize(swap, fp)) return false; - if (font_class_array_ != NULL) { - delete font_class_array_; - font_class_array_ = NULL; - } + delete font_class_array_; + font_class_array_ = NULL; inT8 not_null; if (fread(¬_null, sizeof(not_null), 1, fp) != 1) return false; if (not_null) { diff --git a/cube/char_samp_set.cpp b/cube/char_samp_set.cpp index 2a495095ef..1e212b1957 100644 --- a/cube/char_samp_set.cpp +++ b/cube/char_samp_set.cpp @@ -40,9 +40,7 @@ void CharSampSet::Cleanup() { // only free samples if owned by class if (own_samples_ == true) { for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) { - if (samp_buff_[samp_idx] != NULL) { - delete samp_buff_[samp_idx]; - } + delete samp_buff_[samp_idx]; } } delete []samp_buff_; diff --git a/cube/search_column.cpp b/cube/search_column.cpp index 9a042d016a..e13149d9f5 100644 --- a/cube/search_column.cpp +++ b/cube/search_column.cpp @@ -195,9 +195,7 @@ SearchNode *SearchColumn::AddNode(LangModEdge *edge, int reco_cost, } // free the edge - if (edge != NULL) { - delete edge; - } + delete edge; } // update Min and Max Costs diff --git a/dict/dict.cpp b/dict/dict.cpp index 9d9eaf12a3..0dcc7d6c6e 100644 --- a/dict/dict.cpp +++ b/dict/dict.cpp @@ -191,7 +191,7 @@ Dict::Dict(CCUtil *ccutil) Dict::~Dict() { End(); - if (hyphen_word_ != NULL) delete hyphen_word_; + delete hyphen_word_; if (output_ambig_words_file_ != NULL) fclose(output_ambig_words_file_); } @@ -340,10 +340,8 @@ void Dict::End() { dawgs_.clear(); successors_.clear(); document_words_ = NULL; - if (pending_words_ != NULL) { - delete pending_words_; - pending_words_ = NULL; - } + delete pending_words_; + pending_words_ = NULL; } // Returns true if in light of the current state unichar_id is allowed diff --git a/textord/fpchop.cpp b/textord/fpchop.cpp index be2768cce9..699d419620 100644 --- a/textord/fpchop.cpp +++ b/textord/fpchop.cpp @@ -259,8 +259,8 @@ void split_to_blob( //split the blob pitch_error, left_coutlines, right_coutlines); - if (blob != NULL) - delete blob; //free it + + delete blob; } /********************************************************************** diff --git a/textord/makerow.cpp b/textord/makerow.cpp index c8170e5d16..a5749ad680 100644 --- a/textord/makerow.cpp +++ b/textord/makerow.cpp @@ -507,8 +507,7 @@ void vigorous_noise_removal(TO_BLOCK* block) { continue; // Looks OK. } // It might be noise so get rid of it. - if (blob->cblob() != NULL) - delete blob->cblob(); + delete blob->cblob(); delete b_it.extract(); } else { prev = blob; diff --git a/wordrec/chopper.cpp b/wordrec/chopper.cpp index 850cfcabda..dfda3e9183 100644 --- a/wordrec/chopper.cpp +++ b/wordrec/chopper.cpp @@ -568,9 +568,7 @@ int Wordrec::select_blob_to_split( for (x = 0; x < blob_choices.size(); ++x) { if (blob_choices[x] == NULL) { - if (fragments != NULL) { - delete[] fragments; - } + delete[] fragments; return x; } else { blob_choice = blob_choices[x]; @@ -614,9 +612,7 @@ int Wordrec::select_blob_to_split( } } } - if (fragments != NULL) { - delete[] fragments; - } + delete[] fragments; // TODO(daria): maybe a threshold of badness for // worst_near_fragment would be useful. return worst_index_near_fragment != -1 ? diff --git a/wordrec/language_model.cpp b/wordrec/language_model.cpp index 361fb5c585..99710478ed 100644 --- a/wordrec/language_model.cpp +++ b/wordrec/language_model.cpp @@ -988,7 +988,7 @@ float LanguageModel::ComputeNgramCost(const char *unichar, unichar, context_ptr, CertaintyScore(certainty)/denom, prob, ngram_and_classifier_cost); } - if (modified_context != NULL) delete[] modified_context; + delete[] modified_context; return ngram_and_classifier_cost; } From 2940b44bcda4bd707608b7d7b5f66e5af48d7b8c Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Mon, 28 Nov 2016 08:55:03 -0800 Subject: [PATCH 060/132] Missing pdf font file from previous sync From e9be858503c93a72554832367a772c10df54f3ab Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Mon, 28 Nov 2016 09:39:17 -0800 Subject: [PATCH 061/132] Fixed the memory leak/double free cleanly --- classify/adaptmatch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classify/adaptmatch.cpp b/classify/adaptmatch.cpp index 5ea8a49ce4..e4713a04f5 100644 --- a/classify/adaptmatch.cpp +++ b/classify/adaptmatch.cpp @@ -819,7 +819,7 @@ int Classify::GetAdaptiveFeatures(TBLOB *Blob, Features = ExtractPicoFeatures(Blob); NumFeatures = Features->NumFeatures; - if (NumFeatures > UNLIKELY_NUM_FEAT) { + if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) { FreeFeatureSet(Features); return 0; } From f9253c197e938d772ee866c063019a34c5a4d86a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 7 Dec 2016 16:21:52 +0100 Subject: [PATCH 062/132] Fix build for Mingw-w64 (120a5dbdab78) and non C++11 build (VS2010) --- ccstruct/imagedata.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/ccstruct/imagedata.cpp b/ccstruct/imagedata.cpp index 11e1c8629f..eb1eab7f07 100644 --- a/ccstruct/imagedata.cpp +++ b/ccstruct/imagedata.cpp @@ -30,8 +30,10 @@ #include "helpers.h" #include "tprintf.h" -#if __cplusplus > 199711L // C++11 support - #include +#if defined(__MINGW32__) +# include +##elif __cplusplus <= 199711L // in C++11 +# include #endif // Number of documents to read ahead while training. Doesn't need to be very @@ -451,9 +453,12 @@ const ImageData* DocumentData::GetPage(int index) { if (needs_loading) LoadPageInBackground(index); // We can't directly load the page, or the background load will delete it // while the caller is using it, so give it a chance to work. -#if __cplusplus > 199711L // C++11 support - //TODO: We need to fix this for compilers without C++11 support (e.g. VS2010) - std::this_thread::sleep_for(std::chrono::seconds(1)); +#if __cplusplus > 199711L + std::this_thread::sleep_for(std::chrono::seconds(1)); +#elif _WIN32 // MSVS + Sleep(1000); +#else + sleep(1); #endif } return page; From ca6e64e2c98adb429a484619a531b6d8ca5ef514 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 29 Nov 2016 14:36:37 +0100 Subject: [PATCH 063/132] mingw-w64: Fix compiler warnings caused by macro redefinition GNU compiler report (cross build for Windows on Debian): In file included from ../ccutil/host.h:63:0, from ../arch/dotproductsse.h:22, from ../arch/dotproductsse.cpp:43: ../ccutil/platform.h:27:0: warning: "NOMINMAX" redefined #define NOMINMAX In file included from /usr/lib/gcc/i686-w64-mingw32/6.1-win32/include/c++/i686-w64-mingw32/bits/c++config.h:495:0, from /usr/lib/gcc/i686-w64-mingw32/6.1-win32/include/c++/cstdlib:41, from /usr/lib/gcc/i686-w64-mingw32/6.1-win32/include/c++/stdlib.h:36, from /usr/lib/gcc/i686-w64-mingw32/6.1-win32/include/mm_malloc.h:27, from /usr/lib/gcc/i686-w64-mingw32/6.1-win32/include/xmmintrin.h:34, from /usr/lib/gcc/i686-w64-mingw32/6.1-win32/include/emmintrin.h:31, from ../arch/dotproductsse.cpp:40: /usr/lib/gcc/i686-w64-mingw32/6.1-win32/include/c++/i686-w64-mingw32/bits/os_defines.h:45:0: note: this is the location of the previous definition #define NOMINMAX 1 Signed-off-by: Stefan Weil --- ccutil/platform.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ccutil/platform.h b/ccutil/platform.h index d60a45b601..dd9be87ac6 100644 --- a/ccutil/platform.h +++ b/ccutil/platform.h @@ -24,7 +24,9 @@ #define DLLSYM #ifdef _WIN32 +#ifndef NOMINMAX #define NOMINMAX +#endif /* NOMINMAX */ #define WIN32_LEAN_AND_MEAN #ifdef __GNUC__ #define ultoa _ultoa From a74c6aa27d3dd339874b3a3e762c3e3d981cc7ed Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 28 Nov 2016 15:48:36 +0100 Subject: [PATCH 064/132] cube: Simplify delete operations It is not necessary to check for null pointers. Remove also unneeded delete operations and add missing delete operations in cube/bmp_8.cpp. Simplify also a conditional statement in cube/cube_object.cpp. Signed-off-by: Stefan Weil --- ccmain/cube_reco_context.cpp | 42 ++++++++++++---------------------- cube/beam_search.cpp | 6 ++--- cube/bmp_8.cpp | 12 ++++------ cube/classifier_base.h | 16 ++++--------- cube/cube_object.cpp | 44 ++++++++++++------------------------ 5 files changed, 40 insertions(+), 80 deletions(-) diff --git a/ccmain/cube_reco_context.cpp b/ccmain/cube_reco_context.cpp index fed53f0954..9acf1dfc64 100644 --- a/ccmain/cube_reco_context.cpp +++ b/ccmain/cube_reco_context.cpp @@ -55,40 +55,26 @@ CubeRecoContext::CubeRecoContext(Tesseract *tess_obj) { } CubeRecoContext::~CubeRecoContext() { - if (char_classifier_ != NULL) { - delete char_classifier_; - char_classifier_ = NULL; - } + delete char_classifier_; + char_classifier_ = NULL; - if (word_size_model_ != NULL) { - delete word_size_model_; - word_size_model_ = NULL; - } + delete word_size_model_; + word_size_model_ = NULL; - if (char_set_ != NULL) { - delete char_set_; - char_set_ = NULL; - } + delete char_set_; + char_set_ = NULL; - if (char_bigrams_ != NULL) { - delete char_bigrams_; - char_bigrams_ = NULL; - } + delete char_bigrams_; + char_bigrams_ = NULL; - if (word_unigrams_ != NULL) { - delete word_unigrams_; - word_unigrams_ = NULL; - } + delete word_unigrams_; + word_unigrams_ = NULL; - if (lang_mod_ != NULL) { - delete lang_mod_; - lang_mod_ = NULL; - } + delete lang_mod_; + lang_mod_ = NULL; - if (params_ != NULL) { - delete params_; - params_ = NULL; - } + delete params_; + params_ = NULL; } /** diff --git a/cube/beam_search.cpp b/cube/beam_search.cpp index fd17a1d59f..da43f8c877 100644 --- a/cube/beam_search.cpp +++ b/cube/beam_search.cpp @@ -36,8 +36,7 @@ BeamSearch::BeamSearch(CubeRecoContext *cntxt, bool word_mode) { void BeamSearch::Cleanup() { if (col_ != NULL) { for (int col = 0; col < col_cnt_; col++) { - if (col_[col]) - delete col_[col]; + delete col_[col]; } delete []col_; } @@ -356,8 +355,7 @@ CharSamp **BeamSearch::BackTrack(SearchObject *srch_obj, SearchNode *srch_node, return NULL; if (str32) { - if (*str32) - delete [](*str32); // clear existing value + delete [](*str32); // clear existing value *str32 = srch_node->PathString(); if (!*str32) return NULL; diff --git a/cube/bmp_8.cpp b/cube/bmp_8.cpp index f7b6e0a1b8..cd09c70754 100644 --- a/cube/bmp_8.cpp +++ b/cube/bmp_8.cpp @@ -48,18 +48,14 @@ Bmp8::~Bmp8() { // free buffer void Bmp8::FreeBmpBuffer(unsigned char **buff) { if (buff != NULL) { - if (buff[0] != NULL) { - delete []buff[0]; - } + delete []buff[0]; delete []buff; } } void Bmp8::FreeBmpBuffer(unsigned int **buff) { if (buff != NULL) { - if (buff[0] != NULL) { - delete []buff[0]; - } + delete []buff[0]; delete []buff; } } @@ -77,7 +73,6 @@ unsigned char **Bmp8::CreateBmpBuffer(unsigned char init_val) { buff = (unsigned char **) new unsigned char *[hgt_ * sizeof(*buff)]; if (!buff) { - delete []buff; return NULL; } @@ -85,6 +80,7 @@ unsigned char **Bmp8::CreateBmpBuffer(unsigned char init_val) { buff[0] = (unsigned char *) new unsigned char[stride_ * hgt_ * sizeof(*buff[0])]; if (!buff[0]) { + delete []buff; return NULL; } @@ -105,13 +101,13 @@ unsigned int ** Bmp8::CreateBmpBuffer(int wid, int hgt, // compute stride (align on 4 byte boundries) buff = (unsigned int **) new unsigned int *[hgt * sizeof(*buff)]; if (!buff) { - delete []buff; return NULL; } // alloc and init memory for buffer and line buffer buff[0] = (unsigned int *) new unsigned int[wid * hgt * sizeof(*buff[0])]; if (!buff[0]) { + delete []buff; return NULL; } diff --git a/cube/classifier_base.h b/cube/classifier_base.h index 8c2b1bbf9a..98f2f79af4 100644 --- a/cube/classifier_base.h +++ b/cube/classifier_base.h @@ -49,21 +49,15 @@ class CharClassifier { virtual ~CharClassifier() { if (fold_sets_ != NULL) { for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) { - if (fold_sets_[fold_set] != NULL) { - delete []fold_sets_[fold_set]; - } + delete []fold_sets_[fold_set]; } delete []fold_sets_; fold_sets_ = NULL; } - if (fold_set_len_ != NULL) { - delete []fold_set_len_; - fold_set_len_ = NULL; - } - if (feat_extract_ != NULL) { - delete feat_extract_; - feat_extract_ = NULL; - } + delete []fold_set_len_; + fold_set_len_ = NULL; + delete feat_extract_; + feat_extract_ = NULL; } // pure virtual functions that need to be implemented by any inheriting class diff --git a/cube/cube_object.cpp b/cube/cube_object.cpp index c7dec4d5b8..ca66216e38 100644 --- a/cube/cube_object.cpp +++ b/cube/cube_object.cpp @@ -54,47 +54,33 @@ void CubeObject::Init() { // Cleanup function void CubeObject::Cleanup() { - if (alt_list_ != NULL) { - delete alt_list_; - alt_list_ = NULL; - } + delete alt_list_; + alt_list_ = NULL; - if (deslanted_alt_list_ != NULL) { - delete deslanted_alt_list_; - deslanted_alt_list_ = NULL; - } + delete deslanted_alt_list_; + deslanted_alt_list_ = NULL; } CubeObject::~CubeObject() { - if (char_samp_ != NULL && own_char_samp_ == true) { + if (own_char_samp_ == true) { delete char_samp_; char_samp_ = NULL; } - if (srch_obj_ != NULL) { - delete srch_obj_; - srch_obj_ = NULL; - } + delete srch_obj_; + srch_obj_ = NULL; - if (deslanted_srch_obj_ != NULL) { - delete deslanted_srch_obj_; - deslanted_srch_obj_ = NULL; - } + delete deslanted_srch_obj_; + deslanted_srch_obj_ = NULL; - if (beam_obj_ != NULL) { - delete beam_obj_; - beam_obj_ = NULL; - } + delete beam_obj_; + beam_obj_ = NULL; - if (deslanted_beam_obj_ != NULL) { - delete deslanted_beam_obj_; - deslanted_beam_obj_ = NULL; - } + delete deslanted_beam_obj_; + deslanted_beam_obj_ = NULL; - if (deslanted_char_samp_ != NULL) { - delete deslanted_char_samp_; - deslanted_char_samp_ = NULL; - } + delete deslanted_char_samp_; + deslanted_char_samp_ = NULL; Cleanup(); } From a2fa9cab3694ecd36949c6f0126908aed2c1c7dc Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 28 Nov 2016 15:06:41 +0100 Subject: [PATCH 065/132] cube/char_bigrams: Fix some memory leaks Coverity report: CID 1164717 (#1 of 1): Resource leak (RESOURCE_LEAK) 10. leaked_storage: Variable upper_32 going out of scope leaks the storage it points to. CID 1164718 (#1 of 1): Resource leak (RESOURCE_LEAK) 10. leaked_storage: Variable lower_32 going out of scope leaks the storage it points to. Signed-off-by: Stefan Weil --- cube/char_bigrams.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cube/char_bigrams.cpp b/cube/char_bigrams.cpp index b36b1f6cda..cda869f42b 100644 --- a/cube/char_bigrams.cpp +++ b/cube/char_bigrams.cpp @@ -179,14 +179,14 @@ int CharBigrams::Cost(const char_32 *char_32_ptr, CharSet *char_set) const { if (lower_32 && lower_32[0] != 0) { int cost_lower = MeanCostWithSpaces(lower_32); cost = MIN(cost, cost_lower); - delete [] lower_32; } + delete [] lower_32; char_32 *upper_32 = CubeUtils::ToUpper(char_32_ptr, char_set); if (upper_32 && upper_32[0] != 0) { int cost_upper = MeanCostWithSpaces(upper_32); cost = MIN(cost, cost_upper); - delete [] upper_32; } + delete [] upper_32; } return cost; } From 73c0649d98130cc5ec8db3cd0657a8e835fdb0e0 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 28 Nov 2016 15:09:06 +0100 Subject: [PATCH 066/132] cube/char_samp: Fix some memory leaks Coverity report: CID 1164722 (#9 of 9): Resource leak (RESOURCE_LEAK) 20. leaked_storage: Variable label32 going out of scope leaks the storage it points to. Signed-off-by: Stefan Weil --- cube/char_samp.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/cube/char_samp.cpp b/cube/char_samp.cpp index c3493fa13c..4c4059d6bc 100644 --- a/cube/char_samp.cpp +++ b/cube/char_samp.cpp @@ -112,6 +112,7 @@ CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) { // read label if (fp->Read(label32, val32 * sizeof(*label32)) != (val32 * sizeof(*label32))) { + delete [] label32; return NULL; } // null terminate @@ -121,33 +122,42 @@ CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) { } // read coordinates if (fp->Read(&page, sizeof(page)) != sizeof(page)) { + delete [] label32; return NULL; } if (fp->Read(&left, sizeof(left)) != sizeof(left)) { + delete [] label32; return NULL; } if (fp->Read(&top, sizeof(top)) != sizeof(top)) { + delete [] label32; return NULL; } if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) { + delete [] label32; return NULL; } if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) { + delete [] label32; return NULL; } if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) { + delete [] label32; return NULL; } if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) { + delete [] label32; return NULL; } if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) != sizeof(norm_aspect_ratio)) { + delete [] label32; return NULL; } // create the object CharSamp *char_samp = new CharSamp(); if (char_samp == NULL) { + delete [] label32; return NULL; } // init @@ -163,6 +173,7 @@ CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) { // load the Bmp8 part if (char_samp->LoadFromCharDumpFile(fp) == false) { delete char_samp; + delete [] label32; return NULL; } return char_samp; From b169ef5d345b6a839c6cfe3afb4fcf61b3106d29 Mon Sep 17 00:00:00 2001 From: Morten Espersen Date: Wed, 30 Nov 2016 12:00:23 +0100 Subject: [PATCH 067/132] Correcting link for 3rd party wiki pages --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fd64016d78..cc9f3d69ae 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ languages "out of the box". It can be trained to recognize other languages. See Tesseract supports various output formats: plain-text, hocr(html), pdf. -This project does not include a GUI application. If you need one, please see the [3rdParty](https://github.com/tesseract-ocr/tesseract/wiki/3rdParty) wiki page. +This project does not include a GUI application. If you need one, please see the [3rdParty](https://github.com/tesseract-ocr/tesseract/wiki/User-Projects-%E2%80%93-3rdParty) wiki page. You should note that in many cases, in order to get better OCR results, you'll need to [improve the quality](https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality) of the image you are giving Tesseract. From 82529e31dd8ff7f536585267bf750bfaa8b93b64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 7 Dec 2016 16:58:25 +0100 Subject: [PATCH 068/132] Formatting changes from clang_tidy on latest pull # Conflicts: # ccutil/host.h --- ccstruct/pdblock.h | 4 +--- ccutil/ambigs.h | 3 +-- classify/trainingsample.cpp | 4 ++-- dict/dict.h | 10 +++++----- opencl/oclkernels.h | 4 ++-- opencl/openclwrapper.cpp | 10 +++++----- training/commontraining.cpp | 4 ++-- viewer/scrollview.cpp | 6 +++--- 8 files changed, 21 insertions(+), 24 deletions(-) diff --git a/ccstruct/pdblock.h b/ccstruct/pdblock.h index 1edd9aff50..cf29b782cb 100644 --- a/ccstruct/pdblock.h +++ b/ccstruct/pdblock.h @@ -50,9 +50,7 @@ class PDBLK { void set_sides(ICOORDELT_LIST *left, ICOORDELT_LIST *right); /// destructor - ~PDBLK() { - delete hand_poly; - } + ~PDBLK() { delete hand_poly; } POLY_BLOCK *poly_block() const { return hand_poly; } /// set the poly block diff --git a/ccutil/ambigs.h b/ccutil/ambigs.h index 02686035e9..faab21989b 100644 --- a/ccutil/ambigs.h +++ b/ccutil/ambigs.h @@ -59,8 +59,7 @@ class UnicharIdArrayUtils { // less than length of array2, if any array1[i] is less than array2[i]. // Returns 0 if the arrays are equal, 1 otherwise. // The function assumes that the arrays are terminated by INVALID_UNICHAR_ID. - static inline int compare(const UNICHAR_ID *ptr1, - const UNICHAR_ID *ptr2) { + static inline int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2) { for (;;) { const UNICHAR_ID val1 = *ptr1++; const UNICHAR_ID val2 = *ptr2++; diff --git a/classify/trainingsample.cpp b/classify/trainingsample.cpp index a88eb98bf0..ee6c9d7f83 100644 --- a/classify/trainingsample.cpp +++ b/classify/trainingsample.cpp @@ -209,7 +209,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type, int geo_type, CHAR_DESC_STRUCT* char_desc) { // Extract the INT features. - delete [] features_; + delete[] features_; FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type]; if (char_features == NULL) { tprintf("Error: no features to train on of type %s\n", @@ -230,7 +230,7 @@ void TrainingSample::ExtractCharDesc(int int_feature_type, } } // Extract the Micro features. - delete [] micro_features_; + delete[] micro_features_; char_features = char_desc->FeatureSets[micro_type]; if (char_features == NULL) { tprintf("Error: no features to train on of type %s\n", diff --git a/dict/dict.h b/dict/dict.h index 5ae203689f..31d653af9c 100644 --- a/dict/dict.h +++ b/dict/dict.h @@ -381,11 +381,11 @@ class Dict { double def_probability_in_context( const char* lang, const char* context, int context_bytes, const char* character, int character_bytes) { - (void) lang; - (void) context; - (void) context_bytes; - (void) character; - (void) character_bytes; + (void)lang; + (void)context; + (void)context_bytes; + (void)character; + (void)character_bytes; return 0.0; } double ngram_probability_in_context(const char* lang, diff --git a/opencl/oclkernels.h b/opencl/oclkernels.h index 010ca661b8..5a34fb7a22 100644 --- a/opencl/oclkernels.h +++ b/opencl/oclkernels.h @@ -1209,6 +1209,6 @@ KERNEL( ; // close char* -#endif // USE_EXTERNAL_KERNEL -#endif //_OCL_KERNEL_H_ +#endif // USE_EXTERNAL_KERNEL +#endif //_OCL_KERNEL_H_ /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index 7ca00f35a6..6554bc7035 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -538,11 +538,11 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c // Release all resouces and memory for ( i = 0; i < numDevices; i++ ) { - free(binaries[i]); - binaries[i] = NULL; + free(binaries[i]); + binaries[i] = NULL; } - free( binaries ); + free(binaries); binaries = NULL; free(binarySizes); @@ -550,7 +550,7 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c free(mpArryDevsID); mpArryDevsID = NULL; - + return 1; } @@ -1142,7 +1142,7 @@ OpenclDevice::pixReadMemTiffCl(const l_uint8 *data,size_t size,l_int32 n) } if (pagefound == FALSE) { - L_WARNING("tiff page %d not found", procName, i); + L_WARNING("tiff page %d not found\n", procName, i); TIFFCleanup(tif); return NULL; } diff --git a/training/commontraining.cpp b/training/commontraining.cpp index f415198bcf..57ecdbecef 100644 --- a/training/commontraining.cpp +++ b/training/commontraining.cpp @@ -524,8 +524,8 @@ CLUSTERER *SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, } CharID++; } - free( Sample ); - return( Clusterer ); + free(Sample); + return Clusterer; } /* SetUpForClustering */ diff --git a/viewer/scrollview.cpp b/viewer/scrollview.cpp index f10b789ea8..d4eb6d46a5 100644 --- a/viewer/scrollview.cpp +++ b/viewer/scrollview.cpp @@ -37,9 +37,9 @@ #include "scrollview.h" #ifdef _MSC_VER -#pragma warning(disable:4786) // Don't give irrelevant warnings for stl -#pragma warning(disable:4018) // signed/unsigned warnings -#pragma warning(disable:4530) // exception warnings +#pragma warning(disable : 4786) // Don't give irrelevant warnings for stl +#pragma warning(disable : 4018) // signed/unsigned warnings +#pragma warning(disable : 4530) // exception warnings #endif const int kSvPort = 8461; From 5307204f93887c5fcca93443714833cf66651500 Mon Sep 17 00:00:00 2001 From: Zdenko Podobn?? Date: Wed, 7 Dec 2016 17:04:42 +0100 Subject: [PATCH 069/132] Added std:: to vector --- training/boxchar.cpp | 25 ++++++++++++++----------- training/pango_font_info.h | 24 ++++++++++++------------ training/stringrenderer.h | 8 ++++---- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/training/boxchar.cpp b/training/boxchar.cpp index 4324597744..d3d5160895 100644 --- a/training/boxchar.cpp +++ b/training/boxchar.cpp @@ -49,7 +49,8 @@ void BoxChar::AddBox(int x, int y, int width, int height) { } /* static */ -void BoxChar::TranslateBoxes(int xshift, int yshift, vector* boxes) { +void BoxChar::TranslateBoxes(int xshift, int yshift, + std::vector* boxes) { for (int i = 0; i < boxes->size(); ++i) { BOX* box = (*boxes)[i]->box_; if (box != NULL) { @@ -62,7 +63,7 @@ void BoxChar::TranslateBoxes(int xshift, int yshift, vector* boxes) { // Prepares for writing the boxes to a file by inserting newlines, spaces, // and re-ordering so the boxes are strictly left-to-right. /* static */ -void BoxChar::PrepareToWrite(vector* boxes) { +void BoxChar::PrepareToWrite(std::vector* boxes) { bool rtl_rules = ContainsMostlyRTL(*boxes); bool vertical_rules = MostlyVertical(*boxes); InsertNewlines(rtl_rules, vertical_rules, boxes); @@ -78,7 +79,7 @@ void BoxChar::PrepareToWrite(vector* boxes) { // Inserts newline (tab) characters into the vector at newline positions. /* static */ void BoxChar::InsertNewlines(bool rtl_rules, bool vertical_rules, - vector* boxes) { + std::vector* boxes) { int prev_i = -1; int max_shift = 0; for (int i = 0; i < boxes->size(); ++i) { @@ -141,7 +142,7 @@ void BoxChar::InsertNewlines(bool rtl_rules, bool vertical_rules, // Converts NULL boxes to space characters, with appropriate bounding boxes. /* static */ void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules, - vector* boxes) { + std::vector* boxes) { // After InsertNewlines, any remaining null boxes are not newlines, and are // singletons, so add a box to each remaining null box. for (int i = 1; i + 1 < boxes->size(); ++i) { @@ -197,7 +198,7 @@ void BoxChar::InsertSpaces(bool rtl_rules, bool vertical_rules, // Reorders text in a right-to-left script in left-to-right order. /* static */ -void BoxChar::ReorderRTLText(vector* boxes) { +void BoxChar::ReorderRTLText(std::vector* boxes) { // After adding newlines and spaces, this task is simply a matter of sorting // by left each group of boxes between newlines. BoxCharPtrSort sorter; @@ -211,7 +212,7 @@ void BoxChar::ReorderRTLText(vector* boxes) { // Returns true if the vector contains mostly RTL characters. /* static */ -bool BoxChar::ContainsMostlyRTL(const vector& boxes) { +bool BoxChar::ContainsMostlyRTL(const std::vector& boxes) { int num_rtl = 0, num_ltr = 0; for (int i = 0; i < boxes.size(); ++i) { // Convert the unichar to UTF32 representation @@ -240,7 +241,7 @@ bool BoxChar::ContainsMostlyRTL(const vector& boxes) { // Returns true if the text is mostly laid out vertically. /* static */ -bool BoxChar::MostlyVertical(const vector& boxes) { +bool BoxChar::MostlyVertical(const std::vector& boxes) { inT64 total_dx = 0, total_dy = 0; for (int i = 1; i < boxes.size(); ++i) { if (boxes[i - 1]->box_ != NULL && boxes[i]->box_ != NULL && @@ -259,7 +260,7 @@ bool BoxChar::MostlyVertical(const vector& boxes) { // Returns the total length of all the strings in the boxes. /* static */ -int BoxChar::TotalByteLength(const vector& boxes) { +int BoxChar::TotalByteLength(const std::vector& boxes) { int total_length = 0; for (int i = 0; i < boxes.size(); ++i) total_length += boxes[i]->ch_.size(); return total_length; @@ -269,7 +270,8 @@ int BoxChar::TotalByteLength(const vector& boxes) { // The rotation is in radians clockwise about the given center. /* static */ void BoxChar::RotateBoxes(float rotation, int xcenter, int ycenter, - int start_box, int end_box, vector* boxes) { + int start_box, int end_box, + std::vector* boxes) { Boxa* orig = boxaCreate(0); for (int i = start_box; i < end_box; ++i) { BOX* box = (*boxes)[i]->box_; @@ -289,13 +291,14 @@ void BoxChar::RotateBoxes(float rotation, int xcenter, int ycenter, const int kMaxLineLength = 1024; /* static */ void BoxChar::WriteTesseractBoxFile(const string& filename, int height, - const vector& boxes) { + const std::vector& boxes) { string output = GetTesseractBoxStr(height, boxes); File::WriteStringToFileOrDie(output, filename); } /* static */ -string BoxChar::GetTesseractBoxStr(int height, const vector& boxes) { +string BoxChar::GetTesseractBoxStr(int height, + const std::vector& boxes) { string output; char buffer[kMaxLineLength]; for (int i = 0; i < boxes.size(); ++i) { diff --git a/training/pango_font_info.h b/training/pango_font_info.h index 09a43fab14..5c04bf0a9c 100644 --- a/training/pango_font_info.h +++ b/training/pango_font_info.h @@ -80,7 +80,7 @@ class PangoFontInfo { // If true, returns individual graphemes. Any whitespace characters in the // original string are also included in the list. bool CanRenderString(const char* utf8_word, int len, - vector* graphemes) const; + std::vector* graphemes) const; bool CanRenderString(const char* utf8_word, int len) const; // Retrieves the x_bearing and x_advance for the given utf8 character in the @@ -169,29 +169,29 @@ class FontUtils { // best_match is not NULL, the closest matching font is returned there. static bool IsAvailableFont(const char* font_desc, string* best_match); // Outputs description names of available fonts. - static const vector& ListAvailableFonts(); + static const std::vector& ListAvailableFonts(); // Picks font among available fonts that covers and can render the given word, // and returns the font description name and the decomposition of the word to // graphemes. Returns false if no suitable font was found. static bool SelectFont(const char* utf8_word, const int utf8_len, - string* font_name, vector* graphemes); + string* font_name, std::vector* graphemes); // Picks font among all_fonts that covers and can render the given word, // and returns the font description name and the decomposition of the word to // graphemes. Returns false if no suitable font was found. static bool SelectFont(const char* utf8_word, const int utf8_len, - const vector& all_fonts, - string* font_name, vector* graphemes); + const std::vector& all_fonts, + string* font_name, std::vector* graphemes); // Returns a bitmask where the value of true at index 'n' implies that unicode // value 'n' is renderable by at least one available font. - static void GetAllRenderableCharacters(vector* unichar_bitmap); + static void GetAllRenderableCharacters(std::vector* unichar_bitmap); // Variant of the above function that inspects only the provided font names. - static void GetAllRenderableCharacters(const vector& font_names, - vector* unichar_bitmap); + static void GetAllRenderableCharacters(const std::vector& font_names, + std::vector* unichar_bitmap); static void GetAllRenderableCharacters(const string& font_name, - vector* unichar_bitmap); + std::vector* unichar_bitmap); // NOTE: The following utilities were written to be backward compatible with // StringRender. @@ -204,7 +204,7 @@ class FontUtils { // The return string is a list of the acceptable fonts that were used. static string BestFonts( const TessHashMap& ch_map, - vector > >* font_flag); + std::vector > >* font_flag); // FontScore returns the weighted renderability score of the given // hash map character table in the given font. The unweighted score @@ -213,13 +213,13 @@ class FontUtils { // corresponding character (in order of iterating ch_map) can be rendered. static int FontScore(const TessHashMap& ch_map, const string& fontname, int* raw_score, - vector* ch_flags); + std::vector* ch_flags); // PangoFontInfo is reinitialized, so clear the static list of fonts. static void ReInit(); private: - static vector available_fonts_; // cache list + static std::vector available_fonts_; // cache list }; } // namespace tesseract diff --git a/training/stringrenderer.h b/training/stringrenderer.h index f0ba0c0b00..b4646f71b1 100644 --- a/training/stringrenderer.h +++ b/training/stringrenderer.h @@ -135,7 +135,7 @@ class StringRenderer { // Get the boxchars of all clusters rendered thus far (or since the last call // to ClearBoxes()). - const vector& GetBoxes() const; + const std::vector& GetBoxes() const; // Get the rendered page bounding boxes of all pages created thus far (or // since last call to ClearBoxes()). Boxa* GetPageBoxes() const; @@ -171,8 +171,8 @@ class StringRenderer { void SetWordUnderlineAttributes(const string& page_text); // Compute bounding boxes around grapheme clusters. void ComputeClusterBoxes(); - void CorrectBoxPositionsToLayout(vector* boxchars); - bool GetClusterStrings(vector* cluster_text); + void CorrectBoxPositionsToLayout(std::vector* boxchars); + bool GetClusterStrings(std::vector* cluster_text); int FindFirstPageBreakOffset(const char* text, int text_length); PangoFontInfo font_; @@ -204,7 +204,7 @@ class StringRenderer { int page_; // Boxes and associated text for all pages rendered with RenderToImage() since // the last call to ClearBoxes(). - vector boxchars_; + std::vector boxchars_; int box_padding_; // Bounding boxes for pages since the last call to ClearBoxes(). Boxa* page_boxes_; From 257d6e815673465b124524900fad321e3a021f6c Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 30 Nov 2016 20:23:02 +0100 Subject: [PATCH 070/132] cube: Simplify new operations It is not necessary to check for null pointers after new. Simplify also two delete operations which were missing in the previous commit. Signed-off-by: Stefan Weil --- ccmain/cube_control.cpp | 2 +- ccmain/cube_reco_context.cpp | 10 --- cube/beam_search.cpp | 15 ---- cube/bmp_8.cpp | 52 +------------ cube/cached_file.cpp | 3 - cube/char_altlist.cpp | 7 -- cube/char_bigrams.cpp | 16 ---- cube/char_samp.cpp | 39 ---------- cube/char_samp.h | 22 ++---- cube/char_samp_set.cpp | 10 --- cube/char_set.cpp | 18 ----- cube/classifier_factory.cpp | 12 --- cube/con_comp.cpp | 18 ----- cube/conv_net_classifier.cpp | 36 --------- cube/cube_line_object.cpp | 106 ++++++++++++-------------- cube/cube_line_segmenter.cpp | 6 -- cube/cube_object.cpp | 26 ------- cube/cube_search_object.cpp | 40 +--------- cube/cube_tuning_params.cpp | 5 -- cube/cube_utils.cpp | 14 ---- cube/hybrid_neural_net_classifier.cpp | 23 ------ cube/search_column.cpp | 10 --- cube/search_node.cpp | 3 - cube/tess_lang_mod_edge.cpp | 6 +- cube/tess_lang_model.cpp | 21 +---- cube/word_altlist.cpp | 8 -- cube/word_list_lang_model.cpp | 6 -- cube/word_size_model.cpp | 15 ---- cube/word_unigrams.cpp | 19 ----- 29 files changed, 65 insertions(+), 503 deletions(-) diff --git a/ccmain/cube_control.cpp b/ccmain/cube_control.cpp index b982289db9..8270e4f350 100644 --- a/ccmain/cube_control.cpp +++ b/ccmain/cube_control.cpp @@ -179,7 +179,7 @@ bool Tesseract::init_cube_objects(bool load_combiner, // Create the combiner object and load the combiner net for target languages. if (load_combiner) { tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_); - if (!tess_cube_combiner_ || !tess_cube_combiner_->LoadCombinerNet()) { + if (!tess_cube_combiner_->LoadCombinerNet()) { delete cube_cntxt_; cube_cntxt_ = NULL; if (tess_cube_combiner_ != NULL) { diff --git a/ccmain/cube_reco_context.cpp b/ccmain/cube_reco_context.cpp index 9acf1dfc64..dadc0624a4 100644 --- a/ccmain/cube_reco_context.cpp +++ b/ccmain/cube_reco_context.cpp @@ -131,11 +131,6 @@ bool CubeRecoContext::Load(TessdataManager *tessdata_manager, lang_mod_ = new TessLangModel(lm_params, data_file_path, tess_obj_->getDict().load_system_dawg, tessdata_manager, this); - if (lang_mod_ == NULL) { - fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to create " - "TessLangModel\n"); - return false; - } // Create the optional char bigrams object. char_bigrams_ = CharBigrams::Create(data_file_path, lang_); @@ -176,11 +171,6 @@ CubeRecoContext * CubeRecoContext::Create(Tesseract *tess_obj, UNICHARSET *tess_unicharset) { // create the object CubeRecoContext *cntxt = new CubeRecoContext(tess_obj); - if (cntxt == NULL) { - fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to create " - "CubeRecoContext object\n"); - return NULL; - } // load the necessary components if (cntxt->Load(tessdata_manager, tess_unicharset) == false) { fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init " diff --git a/cube/beam_search.cpp b/cube/beam_search.cpp index da43f8c877..37fc35c566 100644 --- a/cube/beam_search.cpp +++ b/cube/beam_search.cpp @@ -123,11 +123,6 @@ WordAltList * BeamSearch::Search(SearchObject *srch_obj, LangModel *lang_mod) { // alloc memory for columns col_ = new SearchColumn *[col_cnt_]; - if (!col_) { - fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct " - "SearchColumn array\n"); - return NULL; - } memset(col_, 0, col_cnt_ * sizeof(*col_)); // for all possible segments @@ -135,11 +130,6 @@ WordAltList * BeamSearch::Search(SearchObject *srch_obj, LangModel *lang_mod) { // create a search column col_[end_seg - 1] = new SearchColumn(end_seg - 1, cntxt_->Params()->BeamWidth()); - if (!col_[end_seg - 1]) { - fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct " - "SearchColumn for column %d\n", end_seg - 1); - return NULL; - } // for all possible start segments int init_seg = MAX(0, end_seg - cntxt_->Params()->MaxSegPerChar()); @@ -402,11 +392,6 @@ CharSamp **BeamSearch::SplitByNode(SearchObject *srch_obj, // Allocate memory for CharSamp array. CharSamp **chars = new CharSamp *[*char_cnt]; - if (!chars) { - if (char_boxes) - boxaDestroy(char_boxes); - return NULL; - } int ch_idx = *char_cnt - 1; int seg_pt_cnt = srch_obj->SegPtCnt(); diff --git a/cube/bmp_8.cpp b/cube/bmp_8.cpp index cd09c70754..936d344e40 100644 --- a/cube/bmp_8.cpp +++ b/cube/bmp_8.cpp @@ -72,17 +72,10 @@ unsigned char **Bmp8::CreateBmpBuffer(unsigned char init_val) { stride_ = ((wid_ % 4) == 0) ? wid_ : (4 * (1 + (wid_ / 4))); buff = (unsigned char **) new unsigned char *[hgt_ * sizeof(*buff)]; - if (!buff) { - return NULL; - } // alloc and init memory for buffer and line buffer buff[0] = (unsigned char *) new unsigned char[stride_ * hgt_ * sizeof(*buff[0])]; - if (!buff[0]) { - delete []buff; - return NULL; - } memset(buff[0], init_val, stride_ * hgt_ * sizeof(*buff[0])); @@ -100,16 +93,9 @@ unsigned int ** Bmp8::CreateBmpBuffer(int wid, int hgt, // compute stride (align on 4 byte boundries) buff = (unsigned int **) new unsigned int *[hgt * sizeof(*buff)]; - if (!buff) { - return NULL; - } // alloc and init memory for buffer and line buffer buff[0] = (unsigned int *) new unsigned int[wid * hgt * sizeof(*buff[0])]; - if (!buff[0]) { - delete []buff; - return NULL; - } memset(buff[0], init_val, wid * hgt * sizeof(*buff[0])); @@ -172,9 +158,6 @@ bool Bmp8::LoadFromCharDumpFile(CachedFile *fp) { // alloc memory & read the 3 channel buffer buff = new unsigned char[buf_size]; - if (buff == NULL) { - return false; - } if (fp->Read(buff, buf_size) != buf_size) { delete []buff; @@ -213,9 +196,6 @@ bool Bmp8::LoadFromCharDumpFile(CachedFile *fp) { Bmp8 * Bmp8::FromCharDumpFile(CachedFile *fp) { // create a Bmp8 object Bmp8 *bmp_obj = new Bmp8(0, 0); - if (bmp_obj == NULL) { - return NULL; - } if (bmp_obj->LoadFromCharDumpFile(fp) == false) { delete bmp_obj; @@ -267,9 +247,6 @@ bool Bmp8::LoadFromCharDumpFile(FILE *fp) { // alloc memory & read the 3 channel buffer buff = new unsigned char[buf_size]; - if (buff == NULL) { - return false; - } if (fread(buff, 1, buf_size, fp) != buf_size) { delete []buff; @@ -308,9 +285,6 @@ bool Bmp8::LoadFromCharDumpFile(FILE *fp) { Bmp8 * Bmp8::FromCharDumpFile(FILE *fp) { // create a Bmp8 object Bmp8 *bmp_obj = new Bmp8(0, 0); - if (bmp_obj == NULL) { - return NULL; - } if (bmp_obj->LoadFromCharDumpFile(fp) == false) { delete bmp_obj; @@ -545,9 +519,6 @@ bool Bmp8::SaveBmp2CharDumpFile(FILE *fp) const { // alloc memory & write the 3 channel buffer buff = new unsigned char[buf_size]; - if (buff == NULL) { - return false; - } // copy the data for (y = 0, pix = 0; y < hgt_; y++) { @@ -699,7 +670,7 @@ ConComp ** Bmp8::FindConComps(int *concomp_cnt, int min_size) const { // if there was no foreground pix, then create a new concomp if (master_concomp == NULL) { master_concomp = new ConComp(); - if (master_concomp == NULL || master_concomp->Add(x, y) == false) { + if (master_concomp->Add(x, y) == false) { fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not " "allocate or add a connected component\n"); FreeBmpBuffer(out_bmp_array); @@ -711,13 +682,6 @@ ConComp ** Bmp8::FindConComps(int *concomp_cnt, int min_size) const { if ((alloc_concomp_cnt % kConCompAllocChunk) == 0) { ConComp **temp_con_comp = new ConComp *[alloc_concomp_cnt + kConCompAllocChunk]; - if (temp_con_comp == NULL) { - fprintf(stderr, "Cube ERROR (Bmp8::FindConComps): could not " - "extend array of connected components\n"); - FreeBmpBuffer(out_bmp_array); - delete []concomp_array; - return NULL; - } if (alloc_concomp_cnt > 0) { memcpy(temp_con_comp, concomp_array, @@ -774,9 +738,6 @@ bool Bmp8::ComputeTanTable() { // alloc memory for tan table delete []tan_table_; tan_table_ = new float[kDeslantAngleCount]; - if (tan_table_ == NULL) { - return false; - } for (ang_idx = 0, ang_val = kMinDeslantAngle; ang_idx < kDeslantAngleCount; ang_idx++) { @@ -821,10 +782,6 @@ bool Bmp8::Deslant() { int **angle_hist = new int*[kDeslantAngleCount]; for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) { angle_hist[ang_idx] = new int[des_wid]; - if (angle_hist[ang_idx] == NULL) { - delete[] angle_hist; - return false; - } memset(angle_hist[ang_idx], 0, des_wid * sizeof(*angle_hist[ang_idx])); } @@ -1006,10 +963,6 @@ bool Bmp8::HorizontalDeslant(double *deslant_angle) { int **angle_hist = new int*[kDeslantAngleCount]; for (ang_idx = 0; ang_idx < kDeslantAngleCount; ang_idx++) { angle_hist[ang_idx] = new int[des_hgt]; - if (angle_hist[ang_idx] == NULL) { - delete[] angle_hist; - return false; - } memset(angle_hist[ang_idx], 0, des_hgt * sizeof(*angle_hist[ang_idx])); } @@ -1118,9 +1071,6 @@ float Bmp8::MeanHorizontalHistogramEntropy() const { int *Bmp8::HorizontalHistogram() const { int *hist = new int[hgt_]; - if (hist == NULL) { - return NULL; - } // compute histograms for (int y = 0; y < hgt_; y++) { diff --git a/cube/cached_file.cpp b/cube/cached_file.cpp index a9a5b2e0b1..84f6f31d66 100644 --- a/cube/cached_file.cpp +++ b/cube/cached_file.cpp @@ -68,9 +68,6 @@ bool CachedFile::Open() { rewind(fp_); // alloc memory for buffer buff_ = new unsigned char[kCacheSize]; - if (buff_ == NULL) { - return false; - } // init counters buff_size_ = 0; buff_pos_ = 0; diff --git a/cube/char_altlist.cpp b/cube/char_altlist.cpp index c0e7776ef2..d4d4b9c543 100644 --- a/cube/char_altlist.cpp +++ b/cube/char_altlist.cpp @@ -56,10 +56,6 @@ bool CharAltList::Insert(int class_id, int cost, void *tag) { alt_cost_ = new int[max_alt_]; alt_tag_ = new void *[max_alt_]; - if (class_id_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) { - return false; - } - memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_)); } @@ -67,9 +63,6 @@ bool CharAltList::Insert(int class_id, int cost, void *tag) { int class_cnt = char_set_->ClassCount(); class_id_cost_ = new int[class_cnt]; - if (class_id_cost_ == NULL) { - return false; - } for (int ich = 0; ich < class_cnt; ich++) { class_id_cost_[ich] = WORST_COST; diff --git a/cube/char_bigrams.cpp b/cube/char_bigrams.cpp index cda869f42b..b005c1d2d4 100644 --- a/cube/char_bigrams.cpp +++ b/cube/char_bigrams.cpp @@ -61,11 +61,6 @@ CharBigrams *CharBigrams::Create(const string &data_file_path, // construct a new object CharBigrams *char_bigrams_obj = new CharBigrams(); - if (char_bigrams_obj == NULL) { - fprintf(stderr, "Cube ERROR (CharBigrams::Create): could not create " - "character bigrams object.\n"); - return NULL; - } CharBigramTable *table = &char_bigrams_obj->bigram_table_; table->total_cnt = 0; @@ -90,11 +85,6 @@ CharBigrams *CharBigrams::Create(const string &data_file_path, // expand the bigram table if (ch1 > table->max_char) { CharBigram *char_bigram = new CharBigram[ch1 + 1]; - if (char_bigram == NULL) { - fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating " - "additional memory for character bigram table.\n"); - return NULL; - } if (table->char_bigram != NULL && table->max_char >= 0) { memcpy(char_bigram, table->char_bigram, @@ -115,12 +105,6 @@ CharBigrams *CharBigrams::Create(const string &data_file_path, if (ch2 > table->char_bigram[ch1].max_char) { Bigram *bigram = new Bigram[ch2 + 1]; - if (bigram == NULL) { - fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating " - "memory for bigram.\n"); - delete char_bigrams_obj; - return NULL; - } if (table->char_bigram[ch1].bigram != NULL && table->char_bigram[ch1].max_char >= 0) { diff --git a/cube/char_samp.cpp b/cube/char_samp.cpp index 4c4059d6bc..4b9887e4e4 100644 --- a/cube/char_samp.cpp +++ b/cube/char_samp.cpp @@ -106,9 +106,6 @@ CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) { // the label is not null terminated in the file if (val32 > 0 && val32 < MAX_UINT32) { label32 = new char_32[val32 + 1]; - if (label32 == NULL) { - return NULL; - } // read label if (fp->Read(label32, val32 * sizeof(*label32)) != (val32 * sizeof(*label32))) { @@ -156,10 +153,6 @@ CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) { } // create the object CharSamp *char_samp = new CharSamp(); - if (char_samp == NULL) { - delete [] label32; - return NULL; - } // init char_samp->label32_ = label32; char_samp->page_ = page; @@ -206,9 +199,6 @@ CharSamp *CharSamp::FromCharDumpFile(FILE *fp) { // the label is not null terminated in the file if (val32 > 0 && val32 < MAX_UINT32) { label32 = new char_32[val32 + 1]; - if (label32 == NULL) { - return NULL; - } // read label if (fread(label32, 1, val32 * sizeof(*label32), fp) != (val32 * sizeof(*label32))) { @@ -235,10 +225,6 @@ CharSamp *CharSamp::FromCharDumpFile(FILE *fp) { } // create the object CharSamp *char_samp = new CharSamp(); - if (char_samp == NULL) { - delete [] label32; - return NULL; - } // init char_samp->label32_ = label32; char_samp->page_ = page; @@ -261,9 +247,6 @@ CharSamp *CharSamp::FromCharDumpFile(FILE *fp) { // specified width and height CharSamp *CharSamp::Scale(int wid, int hgt, bool isotropic) { CharSamp *scaled_samp = new CharSamp(wid, hgt); - if (scaled_samp == NULL) { - return NULL; - } if (scaled_samp->ScaleFrom(this, isotropic) == false) { delete scaled_samp; return NULL; @@ -285,9 +268,6 @@ CharSamp *CharSamp::FromRawData(int left, int top, int wid, int hgt, unsigned char *data) { // create the object CharSamp *char_samp = new CharSamp(left, top, wid, hgt); - if (char_samp == NULL) { - return NULL; - } if (char_samp->LoadFromRawData(data) == false) { delete char_samp; return NULL; @@ -432,14 +412,6 @@ ConComp **CharSamp::Segment(int *segment_cnt, bool right_2_left, if ((seg_cnt % kConCompAllocChunk) == 0) { ConComp **temp_segm_array = new ConComp *[seg_cnt + kConCompAllocChunk]; - if (temp_segm_array == NULL) { - fprintf(stderr, "Cube ERROR (CharSamp::Segment): could not " - "allocate additional connected components\n"); - delete []concomp_seg_array; - delete []concomp_array; - delete []seg_array; - return NULL; - } if (seg_cnt > 0) { memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array)); delete []seg_array; @@ -497,8 +469,6 @@ CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp, bool *id_exist = new bool[id_cnt]; bool *left_most_exist = new bool[id_cnt]; bool *right_most_exist = new bool[id_cnt]; - if (!id_exist || !left_most_exist || !right_most_exist) - return NULL; memset(id_exist, 0, id_cnt * sizeof(*id_exist)); memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist)); memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist)); @@ -555,9 +525,6 @@ CharSamp *CharSamp::FromConComps(ConComp **concomp_array, int strt_concomp, (*right_most) = (unq_right_most >= unq_ids); // create the char sample object CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1); - if (!samp) { - return NULL; - } // set the foreground pixels for (concomp = strt_concomp; concomp < end_concomp; concomp++) { @@ -605,9 +572,6 @@ CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) { // the label is not null terminated in the file if (val32 > 0 && val32 < MAX_UINT32) { label32 = new char_32[val32 + 1]; - if (label32 == NULL) { - return NULL; - } // read label memcpy(label32, raw_data, val32 * sizeof(*label32)); raw_data += (val32 * sizeof(*label32)); @@ -619,9 +583,6 @@ CharSamp *CharSamp::FromCharDumpFile(unsigned char **raw_data_ptr) { // create the object CharSamp *char_samp = new CharSamp(); - if (char_samp == NULL) { - return NULL; - } // read coordinates char_samp->label32_ = label32; diff --git a/cube/char_samp.h b/cube/char_samp.h index a3c3063bd6..827e1c37c8 100644 --- a/cube/char_samp.h +++ b/cube/char_samp.h @@ -66,20 +66,14 @@ class CharSamp : public Bmp8 { void SetTop(unsigned short top) { top_ = top; } void SetPage(unsigned short page) { page_ = page; } void SetLabel(char_32 label) { - if (label32_ != NULL) { - delete []label32_; - } + delete []label32_; label32_ = new char_32[2]; - if (label32_ != NULL) { - label32_[0] = label; - label32_[1] = 0; - } + label32_[0] = label; + label32_[1] = 0; } void SetLabel(const char_32 *label32) { - if (label32_ != NULL) { - delete []label32_; - label32_ = NULL; - } + delete []label32_; + label32_ = NULL; if (label32 != NULL) { // remove any byte order marks if any if (label32[0] == 0xfeff) { @@ -87,10 +81,8 @@ class CharSamp : public Bmp8 { } int len = LabelLen(label32); label32_ = new char_32[len + 1]; - if (label32_ != NULL) { - memcpy(label32_, label32, len * sizeof(*label32)); - label32_[len] = 0; - } + memcpy(label32_, label32, len * sizeof(*label32)); + label32_[len] = 0; } } void SetLabel(string str); diff --git a/cube/char_samp_set.cpp b/cube/char_samp_set.cpp index 1e212b1957..9af7581187 100644 --- a/cube/char_samp_set.cpp +++ b/cube/char_samp_set.cpp @@ -55,9 +55,6 @@ bool CharSampSet::Add(CharSamp *char_samp) { // create an extended buffer CharSamp **new_samp_buff = reinterpret_cast(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]); - if (new_samp_buff == NULL) { - return false; - } // copy old contents if (cnt_ > 0) { memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_)); @@ -107,10 +104,6 @@ CharSampSet * CharSampSet::FromCharDumpFile(string file_name) { } // create an object CharSampSet *samp_set = new CharSampSet(); - if (samp_set == NULL) { - fclose(fp); - return NULL; - } if (samp_set->LoadCharSamples(fp) == false) { delete samp_set; samp_set = NULL; @@ -146,9 +139,6 @@ bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) { i64_pos; // open the file fp_in = new CachedFile(file_name); - if (fp_in == NULL) { - return false; - } i64_size = fp_in->Size(); if (i64_size < 1) { return false; diff --git a/cube/char_set.cpp b/cube/char_set.cpp index 1414d640f4..b2ec5f544a 100644 --- a/cube/char_set.cpp +++ b/cube/char_set.cpp @@ -54,9 +54,6 @@ CharSet::~CharSet() { CharSet *CharSet::Create(TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset) { CharSet *char_set = new CharSet(); - if (char_set == NULL) { - return NULL; - } // First look for Cube's unicharset; if not there, use tesseract's bool cube_unicharset_exists; @@ -119,19 +116,9 @@ bool CharSet::LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset) { } // memory for class strings class_strings_ = new string_32*[class_cnt_]; - if (class_strings_ == NULL) { - fprintf(stderr, "Cube ERROR (CharSet::InitMemory): could not " - "allocate memory for class strings.\n"); - return false; - } // memory for unicharset map if (tess_unicharset) { unicharset_map_ = new int[class_cnt_]; - if (unicharset_map_ == NULL) { - fprintf(stderr, "Cube ERROR (CharSet::InitMemory): could not " - "allocate memory for unicharset map.\n"); - return false; - } } // Read in character strings and add to hash table @@ -154,11 +141,6 @@ bool CharSet::LoadSupportedCharList(FILE *fp, UNICHARSET *tess_unicharset) { } CubeUtils::UTF8ToUTF32(str_line, &str32); class_strings_[class_id] = new string_32(str32); - if (class_strings_[class_id] == NULL) { - fprintf(stderr, "Cube ERROR (CharSet::ReadAndHashStrings): could not " - "allocate memory for class string with class_id=%d.\n", class_id); - return false; - } // Add to hash-table int hash_val = Hash(reinterpret_cast(str32.c_str())); diff --git a/cube/classifier_factory.cpp b/cube/classifier_factory.cpp index a22f0d4ea8..04df263911 100644 --- a/cube/classifier_factory.cpp +++ b/cube/classifier_factory.cpp @@ -56,12 +56,6 @@ CharClassifier *CharClassifierFactory::Create(const string &data_file_path, return NULL; } - if (feat_extract == NULL) { - fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): unable " - "to instantiate feature extraction object.\n"); - return NULL; - } - // create the classifier object CharClassifier *classifier_obj; switch (params->TypeClassifier()) { @@ -79,12 +73,6 @@ CharClassifier *CharClassifierFactory::Create(const string &data_file_path, return NULL; } - if (classifier_obj == NULL) { - fprintf(stderr, "Cube ERROR (CharClassifierFactory::Create): error " - "allocating memory for character classifier object.\n"); - return NULL; - } - // Init the classifier if (!classifier_obj->Init(data_file_path, lang, lang_mod)) { delete classifier_obj; diff --git a/cube/con_comp.cpp b/cube/con_comp.cpp index 53b1a73b43..a0a926dbd8 100644 --- a/cube/con_comp.cpp +++ b/cube/con_comp.cpp @@ -52,9 +52,6 @@ ConComp::~ConComp() { // adds a pt to the conn comp and updates its boundaries bool ConComp::Add(int x, int y) { ConCompPt *pt_ptr = new ConCompPt(x, y); - if (pt_ptr == NULL) { - return false; - } if (head_ == NULL) { left_ = x; @@ -114,9 +111,6 @@ int *ConComp::CreateHistogram(int max_hist_wnd) { // alloc memo for histogram int *hist_array = new int[wid]; - if (hist_array == NULL) { - return NULL; - } memset(hist_array, 0, wid * sizeof(*hist_array)); @@ -148,9 +142,6 @@ int *ConComp::SegmentHistogram(int *hist_array, int *seg_pt_cnt) { hgt = bottom_ - top_ + 1; int *x_seg_pt = new int[wid]; - if (x_seg_pt == NULL) { - return NULL; - } int seg_pt_wnd = static_cast(hgt * SEG_PT_WND_RATIO); @@ -216,18 +207,9 @@ ConComp **ConComp::Segment(int max_hist_wnd, int *concomp_cnt) { // create concomp array ConComp **concomp_array = new ConComp *[seg_pt_cnt + 1]; - if (concomp_array == NULL) { - delete []x_seg_pt; - return NULL; - } for (int concomp = 0; concomp <= seg_pt_cnt; concomp++) { concomp_array[concomp] = new ConComp(); - if (concomp_array[concomp] == NULL) { - delete []x_seg_pt; - delete []concomp_array; - return NULL; - } // split concomps inherit the ID this concomp concomp_array[concomp]->SetID(id_); diff --git a/cube/conv_net_classifier.cpp b/cube/conv_net_classifier.cpp index ac33cd33b1..e4846ac381 100644 --- a/cube/conv_net_classifier.cpp +++ b/cube/conv_net_classifier.cpp @@ -147,18 +147,7 @@ bool ConvNetCharClassifier::RunNets(CharSamp *char_samp) { // allocate i/p and o/p buffers if needed if (net_input_ == NULL) { net_input_ = new float[feat_cnt]; - if (net_input_ == NULL) { - fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): " - "unable to allocate memory for input nodes\n"); - return false; - } - net_output_ = new float[class_cnt]; - if (net_output_ == NULL) { - fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::RunNets): " - "unable to allocate memory for output nodes\n"); - return false; - } } // compute input features @@ -205,11 +194,6 @@ CharAltList *ConvNetCharClassifier::Classify(CharSamp *char_samp) { // create an altlist CharAltList *alt_list = new CharAltList(char_set_, class_cnt); - if (alt_list == NULL) { - fprintf(stderr, "Cube WARNING (ConvNetCharClassifier::Classify): " - "returning emtpy CharAltList\n"); - return NULL; - } for (int out = 1; out < class_cnt; out++) { int cost = CubeUtils::Prob2Cost(net_output_[out]); @@ -261,14 +245,7 @@ bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path, fold_set_cnt_ = str_vec.size(); fold_sets_ = new int *[fold_set_cnt_]; - if (fold_sets_ == NULL) { - return false; - } fold_set_len_ = new int[fold_set_cnt_]; - if (fold_set_len_ == NULL) { - fold_set_cnt_ = 0; - return false; - } for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) { reinterpret_cast(lang_mod)->RemoveInvalidCharacters( @@ -287,12 +264,6 @@ bool ConvNetCharClassifier::LoadFoldingSets(const string &data_file_path, CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32); fold_set_len_[fold_set] = str32.length(); fold_sets_[fold_set] = new int[fold_set_len_[fold_set]]; - if (fold_sets_[fold_set] == NULL) { - fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadFoldingSets): " - "could not allocate folding set\n"); - fold_set_cnt_ = fold_set; - return false; - } for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) { fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]); } @@ -375,14 +346,7 @@ bool ConvNetCharClassifier::LoadNets(const string &data_file_path, // allocate i/p and o/p buffers if needed if (net_input_ == NULL) { net_input_ = new float[feat_cnt]; - if (net_input_ == NULL) { - return false; - } - net_output_ = new float[class_cnt]; - if (net_output_ == NULL) { - return false; - } } return true; diff --git a/cube/cube_line_object.cpp b/cube/cube_line_object.cpp index 0325453740..72fd87ff2b 100644 --- a/cube/cube_line_object.cpp +++ b/cube/cube_line_object.cpp @@ -91,68 +91,62 @@ bool CubeLineObject::Process() { if (word_break_threshold > 0) { // over-allocate phrases object buffer phrases_ = new CubeObject *[con_comp_cnt]; - if (phrases_ != NULL) { - // create a phrase if the horizontal distance between two consecutive - // concomps is higher than threshold - int start_con_idx = 0; - int current_phrase_limit = rtl ? con_comps[0]->Left() : - con_comps[0]->Right(); - - for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) { - bool create_new_phrase = true; - // if not at the end, compute the distance between two consecutive - // concomps - if (con_idx < con_comp_cnt) { - int dist = 0; - if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) { - dist = current_phrase_limit - con_comps[con_idx]->Right(); - } else { - dist = con_comps[con_idx]->Left() - current_phrase_limit; - } - create_new_phrase = (dist > word_break_threshold); + // create a phrase if the horizontal distance between two consecutive + // concomps is higher than threshold + int start_con_idx = 0; + int current_phrase_limit = rtl ? con_comps[0]->Left() : + con_comps[0]->Right(); + + for (int con_idx = 1; con_idx <= con_comp_cnt; con_idx++) { + bool create_new_phrase = true; + // if not at the end, compute the distance between two consecutive + // concomps + if (con_idx < con_comp_cnt) { + int dist = 0; + if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) { + dist = current_phrase_limit - con_comps[con_idx]->Right(); + } else { + dist = con_comps[con_idx]->Left() - current_phrase_limit; } + create_new_phrase = (dist > word_break_threshold); + } - // create a new phrase - if (create_new_phrase) { - // create a phrase corresponding to a range on components - bool left_most; - bool right_most; - CharSamp *phrase_char_samp = - CharSamp::FromConComps(con_comps, start_con_idx, - con_idx - start_con_idx, NULL, - &left_most, &right_most, - line_pix_->h); - if (phrase_char_samp == NULL) { - break; - } - phrases_[phrase_cnt_] = new CubeObject(cntxt_, phrase_char_samp); - if (phrases_[phrase_cnt_] == NULL) { - delete phrase_char_samp; - break; - } - // set the ownership of the charsamp to the cube object - phrases_[phrase_cnt_]->SetCharSampOwnership(true); - phrase_cnt_++; - // advance the starting index to the current index - start_con_idx = con_idx; - // set the limit of the newly starting phrase (if any) - if (con_idx < con_comp_cnt) { - current_phrase_limit = rtl ? con_comps[con_idx]->Left() : - con_comps[con_idx]->Right(); - } + // create a new phrase + if (create_new_phrase) { + // create a phrase corresponding to a range on components + bool left_most; + bool right_most; + CharSamp *phrase_char_samp = + CharSamp::FromConComps(con_comps, start_con_idx, + con_idx - start_con_idx, NULL, + &left_most, &right_most, + line_pix_->h); + if (phrase_char_samp == NULL) { + break; + } + phrases_[phrase_cnt_] = new CubeObject(cntxt_, phrase_char_samp); + // set the ownership of the charsamp to the cube object + phrases_[phrase_cnt_]->SetCharSampOwnership(true); + phrase_cnt_++; + // advance the starting index to the current index + start_con_idx = con_idx; + // set the limit of the newly starting phrase (if any) + if (con_idx < con_comp_cnt) { + current_phrase_limit = rtl ? con_comps[con_idx]->Left() : + con_comps[con_idx]->Right(); + } + } else { + // update the limit of the current phrase + if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) { + current_phrase_limit = MIN(current_phrase_limit, + con_comps[con_idx]->Left()); } else { - // update the limit of the current phrase - if (cntxt_->ReadingOrder() == tesseract::CubeRecoContext::R2L) { - current_phrase_limit = MIN(current_phrase_limit, - con_comps[con_idx]->Left()); - } else { - current_phrase_limit = MAX(current_phrase_limit, - con_comps[con_idx]->Right()); - } + current_phrase_limit = MAX(current_phrase_limit, + con_comps[con_idx]->Right()); } } - ret_val = true; } + ret_val = true; } // clean-up connected comps diff --git a/cube/cube_line_segmenter.cpp b/cube/cube_line_segmenter.cpp index 278011f090..4b75dca2cd 100644 --- a/cube/cube_line_segmenter.cpp +++ b/cube/cube_line_segmenter.cpp @@ -126,9 +126,6 @@ Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix, Box *cracked_line_box, int line_cnt) { // create lines pixa array Pixa **lines_pixa = new Pixa*[line_cnt]; - if (lines_pixa == NULL) { - return NULL; - } memset(lines_pixa, 0, line_cnt * sizeof(*lines_pixa)); @@ -620,9 +617,6 @@ bool CubeLineSegmenter::AddLines(Pixa *lines) { // Index the specific pixa using RTL reading order int *CubeLineSegmenter::IndexRTL(Pixa *pixa) { int *pix_index = new int[pixa->n]; - if (pix_index == NULL) { - return NULL; - } for (int pix = 0; pix < pixa->n; pix++) { pix_index[pix] = pix; diff --git a/cube/cube_object.cpp b/cube/cube_object.cpp index ca66216e38..4d95f71ba2 100644 --- a/cube/cube_object.cpp +++ b/cube/cube_object.cpp @@ -115,21 +115,11 @@ WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) { // create a beam search object if (beam_obj_ == NULL) { beam_obj_ = new BeamSearch(cntxt_, word_mode); - if (beam_obj_ == NULL) { - fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct " - "BeamSearch\n"); - return NULL; - } } // create a cube search object if (srch_obj_ == NULL) { srch_obj_ = new CubeSearchObject(cntxt_, char_samp_); - if (srch_obj_ == NULL) { - fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not construct " - "CubeSearchObject\n"); - return NULL; - } } // run a beam search against the tesslang model @@ -142,11 +132,6 @@ WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) { if (deslanted_beam_obj_ == NULL) { deslanted_beam_obj_ = new BeamSearch(cntxt_); - if (deslanted_beam_obj_ == NULL) { - fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " - "construct deslanted BeamSearch\n"); - return NULL; - } } if (deslanted_srch_obj_ == NULL) { @@ -162,11 +147,6 @@ WordAltList *CubeObject::Recognize(LangModel *lang_mod, bool word_mode) { } deslanted_srch_obj_ = new CubeSearchObject(cntxt_, deslanted_char_samp_); - if (deslanted_srch_obj_ == NULL) { - fprintf(stderr, "Cube ERROR (CubeObject::Recognize): could not " - "construct deslanted CubeSearchObject\n"); - return NULL; - } } // run a beam search against the tesslang model @@ -205,9 +185,6 @@ WordAltList *CubeObject::RecognizePhrase(LangModel *lang_mod) { */ int CubeObject::WordCost(const char *str) { WordListLangModel *lang_mod = new WordListLangModel(cntxt_); - if (lang_mod == NULL) { - return WORST_COST; - } if (lang_mod->AddString(str) == false) { delete lang_mod; @@ -242,9 +219,6 @@ CharAltList *CubeObject::RecognizeChar() { bool CubeObject::Normalize() { // create a cube search object CubeSearchObject *srch_obj = new CubeSearchObject(cntxt_, char_samp_); - if (srch_obj == NULL) { - return false; - } // Perform over-segmentation int seg_cnt = srch_obj->SegPtCnt(); // Only perform normalization if segment count is large enough diff --git a/cube/cube_search_object.cpp b/cube/cube_search_object.cpp index 731dd35276..ad807193d5 100644 --- a/cube/cube_search_object.cpp +++ b/cube/cube_search_object.cpp @@ -127,36 +127,14 @@ bool CubeSearchObject::Init() { // init cache reco_cache_ = new CharAltList **[segment_cnt_]; - if (reco_cache_ == NULL) { - fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " - "allocate CharAltList array\n"); - return false; - } samp_cache_ = new CharSamp **[segment_cnt_]; - if (samp_cache_ == NULL) { - fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " - "allocate CharSamp array\n"); - return false; - } for (int seg = 0; seg < segment_cnt_; seg++) { reco_cache_[seg] = new CharAltList *[segment_cnt_]; - if (reco_cache_[seg] == NULL) { - fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " - "allocate a single segment's CharAltList array\n"); - return false; - } - memset(reco_cache_[seg], 0, segment_cnt_ * sizeof(*reco_cache_[seg])); samp_cache_[seg] = new CharSamp *[segment_cnt_]; - if (samp_cache_[seg] == NULL) { - fprintf(stderr, "Cube ERROR (CubeSearchObject::Init): could not " - "allocate a single segment's CharSamp array\n"); - return false; - } - memset(samp_cache_[seg], 0, segment_cnt_ * sizeof(*samp_cache_[seg])); } @@ -305,12 +283,10 @@ CharAltList * CubeSearchObject::RecognizeSegment(int start_pt, int end_pt) { exp(-fabs(seg_cnt - 2.0)) * exp(-samp->Width() / static_cast(samp->Height())); - if (alt_list) { - for (int class_idx = 0; class_idx < class_cnt; class_idx++) { - alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val)); - } - reco_cache_[start_pt + 1][end_pt] = alt_list; + for (int class_idx = 0; class_idx < class_cnt; class_idx++) { + alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val)); } + reco_cache_[start_pt + 1][end_pt] = alt_list; } return reco_cache_[start_pt + 1][end_pt]; @@ -353,11 +329,6 @@ bool CubeSearchObject::ComputeSpaceCosts() { // segmentation point int *max_left_x = new int[segment_cnt_ - 1]; int *min_right_x = new int[segment_cnt_ - 1]; - if (!max_left_x || !min_right_x) { - delete []min_right_x; - delete []max_left_x; - return false; - } if (rtl_) { min_right_x[0] = segments_[0]->Left(); max_left_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Right(); @@ -384,11 +355,6 @@ bool CubeSearchObject::ComputeSpaceCosts() { // trivial cases space_cost_ = new int[segment_cnt_ - 1]; no_space_cost_ = new int[segment_cnt_ - 1]; - if (!space_cost_ || !no_space_cost_) { - delete []min_right_x; - delete []max_left_x; - return false; - } // go through all segmentation points determining the horizontal gap between // the images on both sides of each break points. Use the gap to estimate diff --git a/cube/cube_tuning_params.cpp b/cube/cube_tuning_params.cpp index ac16c9f5cb..e4a9b0cf02 100644 --- a/cube/cube_tuning_params.cpp +++ b/cube/cube_tuning_params.cpp @@ -54,11 +54,6 @@ CubeTuningParams::~CubeTuningParams() { CubeTuningParams *CubeTuningParams::Create(const string &data_file_path, const string &lang) { CubeTuningParams *obj = new CubeTuningParams(); - if (!obj) { - fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to " - "allocate new tuning params object\n"); - return NULL; - } string tuning_params_file; tuning_params_file = data_file_path + lang; diff --git a/cube/cube_utils.cpp b/cube/cube_utils.cpp index 13c9c236da..4741659d2a 100644 --- a/cube/cube_utils.cpp +++ b/cube/cube_utils.cpp @@ -90,9 +90,6 @@ int CubeUtils::StrCmp(const char_32 *str1, const char_32 *str2) { char_32 *CubeUtils::StrDup(const char_32 *str32) { int len = StrLen(str32); char_32 *new_str = new char_32[len + 1]; - if (new_str == NULL) { - return NULL; - } memcpy(new_str, str32, len * sizeof(*str32)); new_str[len] = 0; return new_str; @@ -165,9 +162,6 @@ unsigned char *CubeUtils::GetImageData(Pix *pix, int left, int top, // copy the char img to a temp buffer unsigned char *temp_buff = new unsigned char[wid * hgt]; - if (temp_buff == NULL) { - return NULL; - } l_int32 w; l_int32 h; l_int32 d; @@ -211,10 +205,6 @@ bool CubeUtils::ReadFileToString(const string &file_name, string *str) { // read the contents rewind(fp); char *buff = new char[file_size]; - if (buff == NULL) { - fclose(fp); - return false; - } int read_bytes = fread(buff, 1, static_cast(file_size), fp); if (read_bytes == file_size) { str->append(buff, file_size); @@ -352,8 +342,6 @@ char_32 *CubeUtils::ToLower(const char_32 *str32, CharSet *char_set) { UNICHARSET *unicharset = char_set->InternalUnicharset(); int len = StrLen(str32); char_32 *lower = new char_32[len + 1]; - if (!lower) - return NULL; for (int i = 0; i < len; ++i) { char_32 ch = str32[i]; if (ch == INVALID_UNICHAR_ID) { @@ -385,8 +373,6 @@ char_32 *CubeUtils::ToUpper(const char_32 *str32, CharSet *char_set) { UNICHARSET *unicharset = char_set->InternalUnicharset(); int len = StrLen(str32); char_32 *upper = new char_32[len + 1]; - if (!upper) - return NULL; for (int i = 0; i < len; ++i) { char_32 ch = str32[i]; if (ch == INVALID_UNICHAR_ID) { diff --git a/cube/hybrid_neural_net_classifier.cpp b/cube/hybrid_neural_net_classifier.cpp index 9aa3026d8b..29b50d0cbc 100644 --- a/cube/hybrid_neural_net_classifier.cpp +++ b/cube/hybrid_neural_net_classifier.cpp @@ -136,14 +136,7 @@ bool HybridNeuralNetCharClassifier::RunNets(CharSamp *char_samp) { // allocate i/p and o/p buffers if needed if (net_input_ == NULL) { net_input_ = new float[feat_cnt]; - if (net_input_ == NULL) { - return false; - } - net_output_ = new float[class_cnt]; - if (net_output_ == NULL) { - return false; - } } // compute input features @@ -196,9 +189,6 @@ CharAltList *HybridNeuralNetCharClassifier::Classify(CharSamp *char_samp) { // create an altlist CharAltList *alt_list = new CharAltList(char_set_, class_cnt); - if (alt_list == NULL) { - return NULL; - } for (int out = 1; out < class_cnt; out++) { int cost = CubeUtils::Prob2Cost(net_output_[out]); @@ -240,14 +230,7 @@ bool HybridNeuralNetCharClassifier::LoadFoldingSets( CubeUtils::SplitStringUsing(fold_sets_str, "\r\n", &str_vec); fold_set_cnt_ = str_vec.size(); fold_sets_ = new int *[fold_set_cnt_]; - if (fold_sets_ == NULL) { - return false; - } fold_set_len_ = new int[fold_set_cnt_]; - if (fold_set_len_ == NULL) { - fold_set_cnt_ = 0; - return false; - } for (int fold_set = 0; fold_set < fold_set_cnt_; fold_set++) { reinterpret_cast(lang_mod)->RemoveInvalidCharacters( @@ -266,12 +249,6 @@ bool HybridNeuralNetCharClassifier::LoadFoldingSets( CubeUtils::UTF8ToUTF32(str_vec[fold_set].c_str(), &str32); fold_set_len_[fold_set] = str32.length(); fold_sets_[fold_set] = new int[fold_set_len_[fold_set]]; - if (fold_sets_[fold_set] == NULL) { - fprintf(stderr, "Cube ERROR (ConvNetCharClassifier::LoadFoldingSets): " - "could not allocate folding set\n"); - fold_set_cnt_ = fold_set; - return false; - } for (int ch = 0; ch < fold_set_len_[fold_set]; ch++) { fold_sets_[fold_set][ch] = char_set_->ClassID(str32[ch]); } diff --git a/cube/search_column.cpp b/cube/search_column.cpp index e13149d9f5..71f2222337 100644 --- a/cube/search_column.cpp +++ b/cube/search_column.cpp @@ -62,9 +62,6 @@ bool SearchColumn::Init() { // create hash table if (node_hash_table_ == NULL) { node_hash_table_ = new SearchNodeHashTable(); - if (node_hash_table_ == NULL) { - return false; - } } init_ = true; @@ -144,9 +141,6 @@ SearchNode *SearchColumn::AddNode(LangModEdge *edge, int reco_cost, // node does not exist if (new_node == NULL) { new_node = new SearchNode(cntxt, parent_node, reco_cost, edge, col_idx_); - if (new_node == NULL) { - return NULL; - } // if the max node count has already been reached, check if the cost of // the new node exceeds the max cost. This indicates that it will be pruned @@ -161,10 +155,6 @@ SearchNode *SearchColumn::AddNode(LangModEdge *edge, int reco_cost, // alloc a new buff SearchNode **new_node_buff = new SearchNode *[node_cnt_ + kNodeAllocChunk]; - if (new_node_buff == NULL) { - delete new_node; - return NULL; - } // free existing after copying contents if (node_array_ != NULL) { diff --git a/cube/search_node.cpp b/cube/search_node.cpp index ff5bfbd844..cd46625023 100644 --- a/cube/search_node.cpp +++ b/cube/search_node.cpp @@ -147,9 +147,6 @@ char_32 *SearchNode::PathString() { } char_32 *char_ptr = new char_32[len + 1]; - if (char_ptr == NULL) { - return NULL; - } int ch_idx = len; diff --git a/cube/tess_lang_mod_edge.cpp b/cube/tess_lang_mod_edge.cpp index 4d16f3ac28..911070d3e6 100644 --- a/cube/tess_lang_mod_edge.cpp +++ b/cube/tess_lang_mod_edge.cpp @@ -72,9 +72,6 @@ TessLangModEdge::TessLangModEdge(CubeRecoContext *cntxt, const Dawg *dawg, char *TessLangModEdge::Description() const { char *char_ptr = new char[256]; - if (!char_ptr) { - return NULL; - } char dawg_str[256]; char edge_str[32]; @@ -115,9 +112,8 @@ int TessLangModEdge::CreateChildren(CubeRecoContext *cntxt, for (int i = 0; i < vec.size(); ++i) { const NodeChild &child = vec[i]; if (child.unichar_id == INVALID_UNICHAR_ID) continue; - edge_array[edge_cnt] = + edge_array[edge_cnt++] = new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id); - if (edge_array[edge_cnt] != NULL) edge_cnt++; } return edge_cnt; } diff --git a/cube/tess_lang_model.cpp b/cube/tess_lang_model.cpp index 5113207260..3a4c7500d7 100644 --- a/cube/tess_lang_model.cpp +++ b/cube/tess_lang_model.cpp @@ -182,9 +182,6 @@ LangModEdge ** TessLangModel::GetEdges(CharAltList *alt_list, // preallocate the edge buffer (*edge_cnt) = dawg_cnt * max_edge_; edge_array = new LangModEdge *[(*edge_cnt)]; - if (edge_array == NULL) { - return NULL; - } for (int dawg_idx = (*edge_cnt) = 0; dawg_idx < dawg_cnt; dawg_idx++) { const Dawg *curr_dawg = GetDawg(dawg_idx); @@ -213,9 +210,6 @@ LangModEdge ** TessLangModel::GetEdges(CharAltList *alt_list, (*edge_cnt) = max_edge_; // allocate memory for edges edge_array = new LangModEdge *[(*edge_cnt)]; - if (edge_array == NULL) { - return NULL; - } // get the FanOut edges from the root of each dawg (*edge_cnt) = FanOut(alt_list, @@ -240,9 +234,6 @@ int TessLangModel::Edges(const char *strng, const Dawg *dawg, // create an edge object edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg, edge_ref, class_id); - if (edge_array[edge_cnt] == NULL) { - return 0; - } reinterpret_cast(edge_array[edge_cnt])-> SetEdgeMask(edge_mask); @@ -264,10 +255,6 @@ int TessLangModel::OODEdges(CharAltList *alt_list, EDGE_REF edge_ref, alt_list->ClassCost(class_id) <= max_ood_shape_cost_)) { // create an edge object edge_array[edge_cnt] = new TessLangModEdge(cntxt_, class_id); - if (edge_array[edge_cnt] == NULL) { - return 0; - } - edge_cnt++; } } @@ -368,11 +355,9 @@ int TessLangModel::FanOut(CharAltList *alt_list, const Dawg *dawg, edge_array[edge_cnt] = new TessLangModEdge(cntxt_, dawg, child_edge->StartEdge(), child_edge->EndEdge(), class_id); - if (edge_array[edge_cnt] != NULL) { - reinterpret_cast(edge_array[edge_cnt])-> + reinterpret_cast(edge_array[edge_cnt])-> SetEdgeMask(edge_mask); - edge_cnt++; - } + edge_cnt++; } } } @@ -486,8 +471,6 @@ void TessLangModel::RemoveInvalidCharacters(string *lm_str) { int len = CubeUtils::StrLen(lm_str32.c_str()); char_32 *clean_str32 = new char_32[len + 1]; - if (!clean_str32) - return; int clean_len = 0; for (int i = 0; i < len; ++i) { int class_id = char_set->ClassID((char_32)lm_str32[i]); diff --git a/cube/word_altlist.cpp b/cube/word_altlist.cpp index d6775360ad..f91d56c996 100644 --- a/cube/word_altlist.cpp +++ b/cube/word_altlist.cpp @@ -45,11 +45,6 @@ bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) { word_alt_ = new char_32*[max_alt_]; alt_cost_ = new int[max_alt_]; alt_tag_ = new void *[max_alt_]; - - if (word_alt_ == NULL || alt_cost_ == NULL || alt_tag_ == NULL) { - return false; - } - memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_)); } else { // check if alt already exists @@ -69,9 +64,6 @@ bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) { int len = CubeUtils::StrLen(word_str); word_alt_[alt_cnt_] = new char_32[len + 1]; - if (word_alt_[alt_cnt_] == NULL) { - return false; - } if (len > 0) { memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str)); diff --git a/cube/word_list_lang_model.cpp b/cube/word_list_lang_model.cpp index 67a6a5a985..bb07951d25 100644 --- a/cube/word_list_lang_model.cpp +++ b/cube/word_list_lang_model.cpp @@ -54,9 +54,6 @@ bool WordListLangModel::Init() { // false for now, until Cube has a way to express its preferred debug level. dawg_ = new Trie(DAWG_TYPE_WORD, "", NO_PERM, cntxt_->CharacterSet()->ClassCount(), false); - if (dawg_ == NULL) { - return false; - } init_ = true; return true; } @@ -97,9 +94,6 @@ LangModEdge **WordListLangModel::GetEdges(CharAltList *alt_list, // allocate memory for edges LangModEdge **edge_array = new LangModEdge *[kMaxEdge]; - if (edge_array == NULL) { - return NULL; - } // now get all the emerging edges (*edge_cnt) += TessLangModEdge::CreateChildren(cntxt_, dawg_, edge_ref, diff --git a/cube/word_size_model.cpp b/cube/word_size_model.cpp index 6b9a4530fc..be3ccf734d 100644 --- a/cube/word_size_model.cpp +++ b/cube/word_size_model.cpp @@ -43,11 +43,6 @@ WordSizeModel *WordSizeModel::Create(const string &data_file_path, CharSet *char_set, bool contextual) { WordSizeModel *obj = new WordSizeModel(char_set, contextual); - if (!obj) { - fprintf(stderr, "Cube ERROR (WordSizeModel::Create): unable to allocate " - "new word size model object\n"); - return NULL; - } if (!obj->Init(data_file_path, lang)) { delete obj; @@ -96,19 +91,9 @@ bool WordSizeModel::Init(const string &data_file_path, const string &lang) { FontPairSizeInfo fnt_info; fnt_info.pair_size_info = new PairSizeInfo *[size_class_cnt]; - if (!fnt_info.pair_size_info) { - fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allcoating " - "memory for font pair size info\n"); - return false; - } fnt_info.pair_size_info[0] = new PairSizeInfo[size_class_cnt * size_class_cnt]; - if (!fnt_info.pair_size_info[0]) { - fprintf(stderr, "Cube ERROR (WordSizeModel::Init): error allocating " - "memory for font pair size info\n"); - return false; - } memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt * sizeof(PairSizeInfo)); diff --git a/cube/word_unigrams.cpp b/cube/word_unigrams.cpp index b92289d8e8..052a025c90 100644 --- a/cube/word_unigrams.cpp +++ b/cube/word_unigrams.cpp @@ -76,32 +76,13 @@ WordUnigrams *WordUnigrams::Create(const string &data_file_path, // allocate memory WordUnigrams *word_unigrams_obj = new WordUnigrams(); - if (word_unigrams_obj == NULL) { - fprintf(stderr, "Cube ERROR (WordUnigrams::Create): could not create " - "word unigrams object.\n"); - return NULL; - } int full_len = str.length(); int word_cnt = str_vec.size() / 2; word_unigrams_obj->words_ = new char*[word_cnt]; word_unigrams_obj->costs_ = new int[word_cnt]; - if (word_unigrams_obj->words_ == NULL || - word_unigrams_obj->costs_ == NULL) { - fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error allocating " - "word unigram fields.\n"); - delete word_unigrams_obj; - return NULL; - } - word_unigrams_obj->words_[0] = new char[full_len]; - if (word_unigrams_obj->words_[0] == NULL) { - fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error allocating " - "word unigram fields.\n"); - delete word_unigrams_obj; - return NULL; - } // construct sorted list of words and costs word_unigrams_obj->word_cnt_ = 0; From 4789ca2ab8dd338419d7deee3abd131cc1f72f7b Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 30 Nov 2016 20:24:09 +0100 Subject: [PATCH 071/132] Simplify new operations It is not necessary to check for null pointers after new. Signed-off-by: Stefan Weil --- api/pdfrenderer.cpp | 4 ---- dict/trie.cpp | 1 - neural_networks/runtime/neural_net.cpp | 6 ------ neural_networks/runtime/neural_net.h | 3 --- textord/topitch.cpp | 2 -- 5 files changed, 16 deletions(-) diff --git a/api/pdfrenderer.cpp b/api/pdfrenderer.cpp index 80f211bb17..001c86ce41 100644 --- a/api/pdfrenderer.cpp +++ b/api/pdfrenderer.cpp @@ -819,10 +819,6 @@ bool TessPDFRenderer::imageToPDFObj(Pix *pix, *pdf_object_size = b1_len + colorspace_len + b2_len + cid->nbytescomp + b3_len; *pdf_object = new char[*pdf_object_size]; - if (!pdf_object) { - l_CIDataDestroy(&cid); - return false; - } char *p = *pdf_object; memcpy(p, b1, b1_len); diff --git a/dict/trie.cpp b/dict/trie.cpp index 6a7a8d1e35..ac7dd33cdc 100644 --- a/dict/trie.cpp +++ b/dict/trie.cpp @@ -276,7 +276,6 @@ bool Trie::add_word_to_dawg(const WERD_CHOICE &word, NODE_REF Trie::new_dawg_node() { TRIE_NODE_RECORD *node = new TRIE_NODE_RECORD(); - if (node == NULL) return 0; // failed to create new node nodes_.push_back(node); return nodes_.length() - 1; } diff --git a/neural_networks/runtime/neural_net.cpp b/neural_networks/runtime/neural_net.cpp index d0d359bd53..30fa4f5704 100644 --- a/neural_networks/runtime/neural_net.cpp +++ b/neural_networks/runtime/neural_net.cpp @@ -157,9 +157,6 @@ bool NeuralNet::CreateFastNet() { node->fan_in_cnt = neurons_[node_idx].fan_in_cnt(); // allocate memory for fan-in nodes node->inputs = new WeightedNode[node->fan_in_cnt]; - if (node->inputs == NULL) { - return false; - } for (int fan_in = 0; fan_in < node->fan_in_cnt; fan_in++) { // identify fan-in neuron const int id = neurons_[node_idx].fan_in(fan_in)->id(); @@ -222,9 +219,6 @@ NeuralNet *NeuralNet::FromFile(const string file_name) { NeuralNet *NeuralNet::FromInputBuffer(InputFileBuffer *ib) { // create a new net object NeuralNet *net_obj = new NeuralNet(); - if (net_obj == NULL) { - return NULL; - } // load the net if (!net_obj->ReadBinary(ib)) { delete net_obj; diff --git a/neural_networks/runtime/neural_net.h b/neural_networks/runtime/neural_net.h index 081a042804..ff3c35323e 100644 --- a/neural_networks/runtime/neural_net.h +++ b/neural_networks/runtime/neural_net.h @@ -140,9 +140,6 @@ class NeuralNet { } // set the size of the neurons vector neurons_ = new Neuron[neuron_cnt_]; - if (neurons_ == NULL) { - return false; - } // read & validate inputs if (input_buff->Read(&read_val, sizeof(read_val)) != sizeof(read_val)) { return false; diff --git a/textord/topitch.cpp b/textord/topitch.cpp index 3dda815777..cfde683b0b 100644 --- a/textord/topitch.cpp +++ b/textord/topitch.cpp @@ -1285,8 +1285,6 @@ float tune_row_pitch2( //find fp cells return initial_pitch; } sum_proj = new STATS[textord_pitch_range * 2 + 1]; - if (sum_proj == NULL) - return initial_pitch; for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; pitch_delta++) From 9984077798e7c67369495accbcc74920b97bf42b Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 30 Nov 2016 22:14:23 +0100 Subject: [PATCH 072/132] Change tesseract parameter -oem to --oem It was introduced recently in commit f24ef67d, so there is no need to support the old variant for compatibility reasons. Signed-off-by: Stefan Weil --- api/tesseractmain.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 0ac3b1f0c9..1fb429ab1b 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -142,7 +142,7 @@ void PrintHelpMessage(const char* program) { " -c VAR=VALUE Set value for config variables.\n" " Multiple -c arguments are allowed.\n" " -psm NUM Specify page segmentation mode.\n" - " -oem NUM Specify OCR Engine mode.\n" + " --oem NUM Specify OCR Engine mode.\n" "NOTE: These options must occur before any configfile.\n"; printf("\n%s\n", ocr_options); @@ -276,7 +276,7 @@ void ParseArgs(const int argc, char** argv, const char** lang, } else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) { *pagesegmode = static_cast(atoi(argv[i + 1])); ++i; - } else if (strcmp(argv[i], "-oem") == 0 && i + 1 < argc) { + } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) { *enginemode = static_cast(atoi(argv[i + 1])); ++i; } else if (strcmp(argv[i], "--print-parameters") == 0) { From 6933b0618ce13ffb63cf35e405b869a9225ea148 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Wed, 30 Nov 2016 22:23:08 +0100 Subject: [PATCH 073/132] Change tesseract parameter -psm to --psm For compatibility reasons the old variant is still supported. Signed-off-by: Stefan Weil --- README.md | 2 +- api/tesseractmain.cpp | 7 ++++++- doc/tesseract.1 | 4 ++-- doc/tesseract.1.asc | 4 ++-- doc/tesseract.1.html | 4 ++-- doc/tesseract.1.xml | 4 ++-- testing/runtestset.sh | 2 +- 7 files changed, 16 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index cc9f3d69ae..30af585520 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ You can either [Install Tesseract via pre-built binary package](https://github.c Basic command line usage: - tesseract imagename outputbase [-l lang] [-psm pagesegmode] [configfiles...] + tesseract imagename outputbase [-l lang] [--psm pagesegmode] [configfiles...] For more information about the various command line options use `tesseract --help` or `man tesseract`. diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 1fb429ab1b..8f85ff8c98 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -141,7 +141,7 @@ void PrintHelpMessage(const char* program) { " -l LANG[+LANG] Specify language(s) used for OCR.\n" " -c VAR=VALUE Set value for config variables.\n" " Multiple -c arguments are allowed.\n" - " -psm NUM Specify page segmentation mode.\n" + " --psm NUM Specify page segmentation mode.\n" " --oem NUM Specify OCR Engine mode.\n" "NOTE: These options must occur before any configfile.\n"; @@ -274,6 +274,11 @@ void ParseArgs(const int argc, char** argv, const char** lang, noocr = true; *list_langs = true; } else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) { + // The parameter -psm is deprecated and was replaced by --psm. + // It is still supported for compatibility reasons. + *pagesegmode = static_cast(atoi(argv[i + 1])); + ++i; + } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) { *pagesegmode = static_cast(atoi(argv[i + 1])); ++i; } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) { diff --git a/doc/tesseract.1 b/doc/tesseract.1 index 95128fec99..89107f0312 100644 --- a/doc/tesseract.1 +++ b/doc/tesseract.1 @@ -84,7 +84,7 @@ Set value for control parameter\&. Multiple \-c arguments are allowed\&. The language to use\&. If none is specified, English is assumed\&. Multiple languages may be specified, separated by plus characters\&. Tesseract uses 3\-character ISO 639\-2 language codes\&. (See LANGUAGES) .RE .PP -\fI\-psm N\fR +\fI\--psm N\fR .RS 4 Set Tesseract to only run a subset of layout analysis and assume a certain form of image\&. The options for \fBN\fR @@ -139,7 +139,7 @@ pdf \- Output in pdf instead of a text file\&. .RE .RE .sp -\fBNota Bene:\fR The options \fI\-l lang\fR and \fI\-psm N\fR must occur before any \fIconfigfile\fR\&. +\fBNota Bene:\fR The options \fI\-l lang\fR and \fI\--psm N\fR must occur before any \fIconfigfile\fR\&. .SH "SINGLE OPTIONS" .PP \fI\-v\fR diff --git a/doc/tesseract.1.asc b/doc/tesseract.1.asc index 56627a9392..237299fe51 100644 --- a/doc/tesseract.1.asc +++ b/doc/tesseract.1.asc @@ -54,7 +54,7 @@ OPTIONS Multiple languages may be specified, separated by plus characters. Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES) -'-psm N':: +'--psm N':: Set Tesseract to only run a subset of layout analysis and assume a certain form of image. The options for *N* are: @@ -78,7 +78,7 @@ OPTIONS * hocr - Output in hOCR format instead of as a text file. * pdf - Output in pdf instead of a text file. -*Nota Bene:* The options '-l lang' and '-psm N' must occur +*Nota Bene:* The options '-l lang' and '--psm N' must occur before any 'configfile'. diff --git a/doc/tesseract.1.html b/doc/tesseract.1.html index 90c5dae78c..5e37d31170 100644 --- a/doc/tesseract.1.html +++ b/doc/tesseract.1.html @@ -847,7 +847,7 @@

OPTIONS

--psm N +--psm N

@@ -893,7 +893,7 @@

OPTIONS

-

Nota Bene: The options -l lang and -psm N must occur +

Nota Bene: The options -l lang and --psm N must occur before any configfile.

diff --git a/doc/tesseract.1.xml b/doc/tesseract.1.xml index 2f971caa7b..842c5acd61 100644 --- a/doc/tesseract.1.xml +++ b/doc/tesseract.1.xml @@ -130,7 +130,7 @@ at Google since then. --psm N +--psm N @@ -176,7 +176,7 @@ pdf - Output in pdf instead of a text file. -Nota Bene: The options -l lang and -psm N must occur +Nota Bene: The options -l lang and --psm N must occur before any configfile. diff --git a/testing/runtestset.sh b/testing/runtestset.sh index 0c9595f9aa..5c2a7e7aa2 100755 --- a/testing/runtestset.sh +++ b/testing/runtestset.sh @@ -64,7 +64,7 @@ do srcdir="$imdir" fi # echo "$srcdir/$page.tif" - $tess $srcdir/$page.tif $resdir/$page -psm 6 $config 2>&1 |grep -v "OCR Engine" + $tess $srcdir/$page.tif $resdir/$page --psm 6 $config 2>&1 |grep -v "OCR Engine" if [ -r times.txt ] then read t Date: Wed, 7 Dec 2016 17:17:49 +0100 Subject: [PATCH 074/132] opencl: Remove unneeded and potentially bad type casts Signed-off-by: Stefan Weil --- ccmain/thresholder.cpp | 2 +- ccstruct/otsuthr.cpp | 2 +- opencl/openclwrapper.cpp | 186 +++++++++++++++++++-------------------- opencl/openclwrapper.h | 8 +- 4 files changed, 99 insertions(+), 99 deletions(-) diff --git a/ccmain/thresholder.cpp b/ccmain/thresholder.cpp index a9a127eb3b..545ad5854e 100644 --- a/ccmain/thresholder.cpp +++ b/ccmain/thresholder.cpp @@ -265,7 +265,7 @@ void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix, OpenclDevice od; if ((num_channels == 4 || num_channels == 1) && od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) { - od.ThresholdRectToPixOCL((const unsigned char*)pixGetData(src_pix), + od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels, pixGetWpl(src_pix) * 4, thresholds, hi_values, out_pix /*pix_OCL*/, rect_height_, rect_width_, rect_top_, rect_left_); diff --git a/ccstruct/otsuthr.cpp b/ccstruct/otsuthr.cpp index ee6c739928..8822ce2943 100644 --- a/ccstruct/otsuthr.cpp +++ b/ccstruct/otsuthr.cpp @@ -58,7 +58,7 @@ int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height, OpenclDevice od; if (od.selectedDeviceIsOpenCL() && (num_channels == 1 || num_channels == 4) && top == 0 && left == 0) { - od.HistogramRectOCL((const unsigned char*)pixGetData(src_pix), num_channels, + od.HistogramRectOCL((unsigned char*)pixGetData(src_pix), num_channels, pixGetWpl(src_pix) * 4, left, top, width, height, kHistogramSize, histogramAllChannels); diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index 6554bc7035..bcfe7ddacf 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -128,10 +128,10 @@ void populateGPUEnvFromDevice( GPUEnv *gpuInfo, cl_device_id device ) { gpuInfo->mpDevID = device; gpuInfo->mpArryDevsID = new cl_device_id[1]; gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID; - clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE , sizeof(cl_device_type), (void *) &gpuInfo->mDevType , &size); + clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE , sizeof(cl_device_type), &gpuInfo->mDevType , &size); CHECK_OPENCL( clStatus, "populateGPUEnv::getDeviceInfo(TYPE)"); // platform - clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM , sizeof(cl_platform_id), (void *) &gpuInfo->mpPlatformID , &size); + clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM , sizeof(cl_platform_id), &gpuInfo->mpPlatformID , &size); CHECK_OPENCL( clStatus, "populateGPUEnv::getDeviceInfo(PLATFORM)"); // context cl_context_properties props[3]; @@ -773,15 +773,15 @@ PERF_COUNT_START("pixReadFromTiffKernel") outputCl = allocateZeroCopyBuffer(rEnv, pResult, w*h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus); //Kernel arguments - clStatus = clSetKernelArg( rEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&valuesCl ); + clStatus = clSetKernelArg( rEnv.mpkKernel, 0, sizeof(cl_mem), &valuesCl ); CHECK_OPENCL( clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg( rEnv.mpkKernel, 1, sizeof(w), (void *)&w ); + clStatus = clSetKernelArg( rEnv.mpkKernel, 1, sizeof(w), &w ); CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 2, sizeof(h), (void *)&h ); + clStatus = clSetKernelArg( rEnv.mpkKernel, 2, sizeof(h), &h ); CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 3, sizeof(wpl), (void *)&wpl ); + clStatus = clSetKernelArg( rEnv.mpkKernel, 3, sizeof(wpl), &wpl ); CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 4, sizeof(cl_mem), (void *)&outputCl ); + clStatus = clSetKernelArg( rEnv.mpkKernel, 4, sizeof(cl_mem), &outputCl ); CHECK_OPENCL( clStatus, "clSetKernelArg"); //Kernel enqueue @@ -1402,11 +1402,11 @@ pixDilateCL_55(l_int32 wpl, l_int32 h) status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), - (const void *)&wpl); + &wpl); status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), - (const void *)&h); + &h); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, @@ -1444,11 +1444,11 @@ pixDilateCL_55(l_int32 wpl, l_int32 h) status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), - (const void *)&wpl); + &wpl); status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), - (const void *)&h); + &h); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, @@ -1496,11 +1496,11 @@ pixErodeCL_55(l_int32 wpl, l_int32 h) status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), - (const void *)&wpl); + &wpl); status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), - (const void *)&h); + &h); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, @@ -1538,19 +1538,19 @@ pixErodeCL_55(l_int32 wpl, l_int32 h) status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), - (const void *)&wpl); + &wpl); status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), - (const void *)&h); + &h); status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(fwmask), - (const void *)&fwmask); + &fwmask); status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(lwmask), - (const void *)&lwmask); + &lwmask); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, @@ -1606,11 +1606,11 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), (const void *)&xn); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn); status = - clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), (const void *)&h); + clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL); @@ -1630,12 +1630,12 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); status = - clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), (const void *)&h); + clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isEven), - (const void *)&isEven); + &isEven); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL); @@ -1662,19 +1662,19 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), - (const void *)&yp); + &yp); status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), - (const void *)&wpl); + &wpl); status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), - (const void *)&h); + &h); status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(yn), - (const void *)&yn); + &yn); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, @@ -1732,16 +1732,16 @@ cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) { status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), (const void *)&xn); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), (const void *)&h); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h); status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(isAsymmetric), - (const void *)&isAsymmetric); + &isAsymmetric); status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(rwmask), - (const void *)&rwmask); + &rwmask); status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(lwmask), - (const void *)&lwmask); + &lwmask); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL); @@ -1757,17 +1757,17 @@ cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) { status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), (const void *)&xp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), (const void *)&h); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), - (const void *)&isAsymmetric); + &isAsymmetric); status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(rwmask), - (const void *)&rwmask); + &rwmask); status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(lwmask), - (const void *)&lwmask); + &lwmask); status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(isEven), - (const void *)&isEven); + &isEven); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL); @@ -1784,12 +1784,12 @@ cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) { status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), (const void *)&yp); - status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), (const void *)&wpl); - status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), (const void *)&h); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), - (const void *)&isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(yn), (const void *)&yn); + &isAsymmetric); + status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(yn), &yn); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL); } @@ -1942,11 +1942,11 @@ pixORCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem ou status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), - (const void *)&wpl); + &wpl); status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), - (const void *)&h); + &h); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, @@ -1992,11 +1992,11 @@ pixANDCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem o status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), - (const void *)&wpl); + &wpl); status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), - (const void *)&h); + &h); status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, @@ -2045,17 +2045,17 @@ pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_ status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), - (const void *)&wpl); + &wpl); status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), - (const void *)&h); + &h); if (outBuffer != NULL) { status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), - (const void *)&outBuffer); + &outBuffer); } status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, @@ -2228,7 +2228,7 @@ void OpenclDevice::pixGetLinesCL(PIX *pixd, PIX *pixs, PIX **pix_vline, * histogramAllChannels is laid out as all channel 0, then all channel 1... * only supports 1 or 4 channels (bytes_per_pixel) ************************************************************************/ -int OpenclDevice::HistogramRectOCL(const unsigned char *imageData, +int OpenclDevice::HistogramRectOCL(unsigned char *imageData, int bytes_per_pixel, int bytes_per_line, int left, // always 0 int top, // always 0 @@ -2248,7 +2248,7 @@ int OpenclDevice::HistogramRectOCL(const unsigned char *imageData, // using a garlic bus memory type cl_mem imageBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - width * height * bytes_per_pixel * sizeof(char), (void *)imageData, + width * height * bytes_per_pixel * sizeof(char), imageData, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); @@ -2273,7 +2273,7 @@ int OpenclDevice::HistogramRectOCL(const unsigned char *imageData, cl_mem histogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, kHistogramSize * bytes_per_pixel * sizeof(int), - (void *)histogramAllChannels, &clStatus); + histogramAllChannels, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer histogramBuffer"); /* intermediate histogram buffer */ @@ -2290,7 +2290,7 @@ int OpenclDevice::HistogramRectOCL(const unsigned char *imageData, zeroBuffer[0] = 0; cl_mem atomicSyncBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, - sizeof(cl_int), (void *)zeroBuffer, &clStatus); + sizeof(cl_int), zeroBuffer, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer atomicSyncBuffer"); delete[] zeroBuffer; // Create kernel objects based on bytes_per_pixel @@ -2322,21 +2322,21 @@ int OpenclDevice::HistogramRectOCL(const unsigned char *imageData, clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0, NULL, NULL); /* set kernel 1 arguments */ - clStatus = clSetKernelArg( histKern.mpkKernel, 0, sizeof(cl_mem), (void *)&imageBuffer ); + clStatus = clSetKernelArg( histKern.mpkKernel, 0, sizeof(cl_mem), &imageBuffer ); CHECK_OPENCL( clStatus, "clSetKernelArg imageBuffer"); cl_uint numPixels = width*height; - clStatus = clSetKernelArg( histKern.mpkKernel, 1, sizeof(cl_uint), (void *)&numPixels ); + clStatus = clSetKernelArg( histKern.mpkKernel, 1, sizeof(cl_uint), &numPixels ); CHECK_OPENCL( clStatus, "clSetKernelArg numPixels" ); - clStatus = clSetKernelArg( histKern.mpkKernel, 2, sizeof(cl_mem), (void *)&tmpHistogramBuffer ); + clStatus = clSetKernelArg( histKern.mpkKernel, 2, sizeof(cl_mem), &tmpHistogramBuffer ); CHECK_OPENCL( clStatus, "clSetKernelArg tmpHistogramBuffer"); /* set kernel 2 arguments */ int n = numThreads/bytes_per_pixel; - clStatus = clSetKernelArg( histRedKern.mpkKernel, 0, sizeof(cl_int), (void *)&n ); + clStatus = clSetKernelArg( histRedKern.mpkKernel, 0, sizeof(cl_int), &n ); CHECK_OPENCL( clStatus, "clSetKernelArg imageBuffer"); - clStatus = clSetKernelArg( histRedKern.mpkKernel, 1, sizeof(cl_mem), (void *)&tmpHistogramBuffer ); + clStatus = clSetKernelArg( histRedKern.mpkKernel, 1, sizeof(cl_mem), &tmpHistogramBuffer ); CHECK_OPENCL( clStatus, "clSetKernelArg tmpHistogramBuffer"); - clStatus = clSetKernelArg( histRedKern.mpkKernel, 2, sizeof(cl_mem), (void *)&histogramBuffer ); + clStatus = clSetKernelArg( histRedKern.mpkKernel, 2, sizeof(cl_mem), &histogramBuffer ); CHECK_OPENCL( clStatus, "clSetKernelArg histogramBuffer"); /* launch histogram */ @@ -2384,10 +2384,10 @@ return retVal; * from the class, using thresholds/hi_values to the output IMAGE. * only supports 1 or 4 channels ************************************************************************/ -int OpenclDevice::ThresholdRectToPixOCL(const unsigned char *imageData, +int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, int bytes_per_pixel, int bytes_per_line, - const int *thresholds, - const int *hi_values, Pix **pix, + int *thresholds, + int *hi_values, Pix **pix, int height, int width, int top, int left) { PERF_COUNT_START("ThresholdRectToPixOCL") @@ -2423,23 +2423,23 @@ int OpenclDevice::ThresholdRectToPixOCL(const unsigned char *imageData, cl_mem imageBuffer = clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, width * height * bytes_per_pixel * sizeof(char), - (void *)imageData, &clStatus); + imageData, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); /* map pix as write only */ pixThBuffer = clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, - pixSize, (void *)pixData, &clStatus); + pixSize, pixData, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer pix"); /* map thresholds and hi_values */ cl_mem thresholdsBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - bytes_per_pixel * sizeof(int), (void *)thresholds, &clStatus); + bytes_per_pixel * sizeof(int), thresholds, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer thresholdBuffer"); cl_mem hiValuesBuffer = clCreateBuffer( rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - bytes_per_pixel * sizeof(int), (void *)hi_values, &clStatus); + bytes_per_pixel * sizeof(int), hi_values, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer hiValuesBuffer"); /* compile kernel */ @@ -2455,23 +2455,23 @@ int OpenclDevice::ThresholdRectToPixOCL(const unsigned char *imageData, /* set kernel arguments */ clStatus = - clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&imageBuffer); + clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &imageBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); cl_uint numPixels = width * height; - clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(int), (void *)&height); + clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(int), &height); CHECK_OPENCL(clStatus, "clSetKernelArg height"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(int), (void *)&width); + clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(int), &width); CHECK_OPENCL(clStatus, "clSetKernelArg width"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(int), (void *)&wpl); + clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(int), &wpl); CHECK_OPENCL(clStatus, "clSetKernelArg wpl"); clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), - (void *)&thresholdsBuffer); + &thresholdsBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg thresholdsBuffer"); clStatus = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(cl_mem), - (void *)&hiValuesBuffer); + &hiValuesBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg hiValuesBuffer"); clStatus = - clSetKernelArg(rEnv.mpkKernel, 6, sizeof(cl_mem), (void *)&pixThBuffer); + clSetKernelArg(rEnv.mpkKernel, 6, sizeof(cl_mem), &pixThBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg pixThBuffer"); /* launch kernel & wait */ @@ -3000,7 +3000,7 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in // encode score object as byte string ds_status serializeScore( ds_device* device, void **serializedScore, unsigned int* serializedScoreSize ) { *serializedScoreSize = sizeof(TessDeviceScore); - *serializedScore = (void *) new unsigned char[*serializedScoreSize]; + *serializedScore = new unsigned char[*serializedScoreSize]; memcpy(*serializedScore, device->score, *serializedScoreSize); return DS_SUCCESS; } @@ -3061,7 +3061,7 @@ ds_status evaluateScoreForDevice( ds_device *device, void *inputData) { histogramRectWeight * histogramRectTime + thresholdRectToPixWeight * thresholdRectToPixTime + getLineMasksMorphWeight * getLineMasksMorphTime; - device->score = (void *)new TessDeviceScore; + device->score = new TessDeviceScore; ((TessDeviceScore *)device->score)->time = weightedTime; printf("[DS] Device: \"%s\" (%s) evaluated\n", device->oclDeviceName, device->type==DS_DEVICE_OPENCL_DEVICE ? "OpenCL" : "Native" ); @@ -3102,7 +3102,7 @@ ds_device OpenclDevice::getDeviceSelection( ) { unsigned int numUpdates; status = profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, - (void *)&input, &numUpdates); + &input, &numUpdates); PERF_COUNT_SUB("profileDevices") // write scores to file if (status == DS_SUCCESS) { @@ -3266,13 +3266,13 @@ Pix *OpenclDevice::pixConvertRGBToGrayOCL(Pix *srcPix, // 32-bit source // source buffer cl_mem srcBuffer = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - srcSize, (void *)srcData, &clStatus); + srcSize, srcData, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer srcBuffer"); // destination buffer cl_mem dstBuffer = clCreateBuffer(kEnv.mpkContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, - dstSize, (void *)dstData, &clStatus); + dstSize, dstData, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer dstBuffer"); // setup work group size parameters @@ -3290,24 +3290,24 @@ Pix *OpenclDevice::pixConvertRGBToGrayOCL(Pix *srcPix, // 32-bit source /* set kernel arguments */ clStatus = - clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), (void *)&srcBuffer); + clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), &srcBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg srcBuffer"); clStatus = - clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), (void *)&dstBuffer); + clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), &dstBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg dstBuffer"); - clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(int), (void *)&srcWPL); + clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(int), &srcWPL); CHECK_OPENCL(clStatus, "clSetKernelArg srcWPL"); - clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(int), (void *)&dstWPL); + clStatus = clSetKernelArg(kEnv.mpkKernel, 3, sizeof(int), &dstWPL); CHECK_OPENCL(clStatus, "clSetKernelArg dstWPL"); - clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(int), (void *)&h); + clStatus = clSetKernelArg(kEnv.mpkKernel, 4, sizeof(int), &h); CHECK_OPENCL(clStatus, "clSetKernelArg height"); - clStatus = clSetKernelArg(kEnv.mpkKernel, 5, sizeof(int), (void *)&w); + clStatus = clSetKernelArg(kEnv.mpkKernel, 5, sizeof(int), &w); CHECK_OPENCL(clStatus, "clSetKernelArg width"); - clStatus = clSetKernelArg(kEnv.mpkKernel, 6, sizeof(float), (void *)&rwt); + clStatus = clSetKernelArg(kEnv.mpkKernel, 6, sizeof(float), &rwt); CHECK_OPENCL(clStatus, "clSetKernelArg rwt"); - clStatus = clSetKernelArg(kEnv.mpkKernel, 7, sizeof(float), (void *)&gwt); + clStatus = clSetKernelArg(kEnv.mpkKernel, 7, sizeof(float), &gwt); CHECK_OPENCL(clStatus, "clSetKernelArg gwt"); - clStatus = clSetKernelArg(kEnv.mpkKernel, 8, sizeof(float), (void *)&bwt); + clStatus = clSetKernelArg(kEnv.mpkKernel, 8, sizeof(float), &bwt); CHECK_OPENCL(clStatus, "clSetKernelArg bwt"); /* launch kernel & wait */ diff --git a/opencl/openclwrapper.h b/opencl/openclwrapper.h index cfd612aa98..0fbaf89d7d 100644 --- a/opencl/openclwrapper.h +++ b/opencl/openclwrapper.h @@ -298,15 +298,15 @@ class OpenclDevice inline static int AddKernelConfig( int kCount, const char *kName ); /* for binarization */ - static int HistogramRectOCL(const unsigned char *imagedata, + static int HistogramRectOCL(unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, int left, int top, int width, int height, int kHistogramSize, int *histogramAllChannels); - static int ThresholdRectToPixOCL(const unsigned char *imagedata, + static int ThresholdRectToPixOCL(unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, - const int *thresholds, - const int *hi_values, Pix **pix, + int *thresholds, + int *hi_values, Pix **pix, int rect_height, int rect_width, int rect_top, int rect_left); From 98d50f6c16122f4f26a092acef316da1e8d4afe2 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 2 Dec 2016 22:17:41 +0100 Subject: [PATCH 075/132] cube: Fix use after free regression Coverity report: CID 1366758: Memory - corruptions (USE_AFTER_FREE) Calling "operator delete[]" frees pointer "label32" which has already been freed. Commit f60ff4d57560b2daf03951015dd188446ce50024 fixed several memory leaks but also added this wrong delete operation. label32 is assigned to char_samp->label32_, so it is freed when deleting char_samp. Signed-off-by: Stefan Weil --- cube/char_samp.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cube/char_samp.cpp b/cube/char_samp.cpp index 4b9887e4e4..f55735be0e 100644 --- a/cube/char_samp.cpp +++ b/cube/char_samp.cpp @@ -166,7 +166,6 @@ CharSamp *CharSamp::FromCharDumpFile(CachedFile *fp) { // load the Bmp8 part if (char_samp->LoadFromCharDumpFile(fp) == false) { delete char_samp; - delete [] label32; return NULL; } return char_samp; From 8a02c4f2e90d3374eb9407279bd171c53934cc76 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 2 Dec 2016 22:25:00 +0100 Subject: [PATCH 076/132] cube: Fix coverity warning caused by unneeded null pointer check Commit 03eec61a2f0bbcbc0b1b612fd4693ba70ce66678 removed unneeded null pointer checks after new, but missed one which now raises a warning from coverity scan. Remove that one, too. Signed-off-by: Stefan Weil --- ccmain/cube_control.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ccmain/cube_control.cpp b/ccmain/cube_control.cpp index 8270e4f350..50f7512dd1 100644 --- a/ccmain/cube_control.cpp +++ b/ccmain/cube_control.cpp @@ -182,10 +182,8 @@ bool Tesseract::init_cube_objects(bool load_combiner, if (!tess_cube_combiner_->LoadCombinerNet()) { delete cube_cntxt_; cube_cntxt_ = NULL; - if (tess_cube_combiner_ != NULL) { - delete tess_cube_combiner_; - tess_cube_combiner_ = NULL; - } + delete tess_cube_combiner_; + tess_cube_combiner_ = NULL; if (cube_debug_level > 0) tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n"); return false; From d009c4fc8c93cdf661e97a84b4cec4ee58c61ca9 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 3 Dec 2016 22:34:02 +0100 Subject: [PATCH 077/132] opencl: Add missing checks for OpenCL failures Fix also text for an existing check. Signed-off-by: Stefan Weil --- opencl/openclwrapper.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index bcfe7ddacf..1361299b77 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -766,7 +766,7 @@ PERF_COUNT_START("pixReadFromTiffKernel") l_uint32 *pResult = (l_uint32 *)malloc(w*h * sizeof(l_uint32)); rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "composeRGBPixel", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel"); + CHECK_OPENCL( clStatus, "clCreateKernel composeRGBPixel"); //Allocate input and output OCL buffers valuesCl = allocateZeroCopyBuffer(rEnv, tiffdata, w*h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus); @@ -1390,6 +1390,7 @@ pixDilateCL_55(l_int32 wpl, l_int32 h) localThreads[1] = GROUPSIZE_HMORY; rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateHor_5x5", &status ); + CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_5x5"); status = clSetKernelArg(rEnv.mpkKernel, 0, @@ -1432,6 +1433,7 @@ pixDilateCL_55(l_int32 wpl, l_int32 h) localThreads[1] = GROUPSIZE_Y; rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateVer_5x5", &status ); + CHECK_OPENCL(status, "clCreateKernel morphoDilateVer_5x5"); status = clSetKernelArg(rEnv.mpkKernel, 0, @@ -1484,6 +1486,7 @@ pixErodeCL_55(l_int32 wpl, l_int32 h) localThreads[1] = GROUPSIZE_HMORY; rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeHor_5x5", &status ); + CHECK_OPENCL(status, "clCreateKernel morphoErodeHor_5x5"); status = clSetKernelArg(rEnv.mpkKernel, 0, @@ -1526,6 +1529,7 @@ pixErodeCL_55(l_int32 wpl, l_int32 h) localThreads[1] = GROUPSIZE_Y; rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoErodeVer_5x5", &status ); + CHECK_OPENCL(status, "clCreateKernel morphoErodeVer_5x5"); status = clSetKernelArg(rEnv.mpkKernel, 0, @@ -1603,6 +1607,7 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) // Generic case. rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoDilateHor", &status); + CHECK_OPENCL(status, "clCreateKernel morphoDilateHor"); status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); @@ -1626,6 +1631,7 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) // Specific Horizontal pass kernel for half width < 32 rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoDilateHor_32word", &status); + CHECK_OPENCL(status, "clCreateKernel morphoDilateHor_32word"); isEven = (xp != xn); status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); @@ -1644,12 +1650,13 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) pixtemp = pixsCLBuffer; pixsCLBuffer = pixdCLBuffer; pixdCLBuffer = pixtemp; - } + } } if (yp > 0 || yn > 0) { rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "morphoDilateVer", &status ); + CHECK_OPENCL(status, "clCreateKernel morphoDilateVer"); status = clSetKernelArg(rEnv.mpkKernel, 0, @@ -1781,6 +1788,7 @@ cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) { // Vertical Pass if (yp > 0 || yn > 0) { rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "morphoErodeVer", &status); + CHECK_OPENCL(status, "clCreateKernel morphoErodeVer"); status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); @@ -1926,6 +1934,7 @@ pixORCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem ou globalThreads[1] = gsize; rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "pixOR", &status ); + CHECK_OPENCL(status, "clCreateKernel pixOR"); status = clSetKernelArg(rEnv.mpkKernel, 0, @@ -1975,6 +1984,7 @@ pixANDCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem o globalThreads[1] = gsize; rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "pixAND", &status ); + CHECK_OPENCL(status, "clCreateKernel pixAND"); // Enqueue a kernel run call. status = clSetKernelArg(rEnv.mpkKernel, @@ -2027,10 +2037,12 @@ pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_ if (outBuffer != NULL) { rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "pixSubtract", &status ); + CHECK_OPENCL(status, "clCreateKernel pixSubtract"); } else { rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "pixSubtract_inplace", &status ); + CHECK_OPENCL(status, "clCreateKernel pixSubtract_inplace"); } // Enqueue a kernel run call. From 4535d24d13ba7c4038a5194453333326409114bd Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 4 Dec 2016 12:13:43 +0100 Subject: [PATCH 078/132] Remove extra semicolons after member function definitions clang++ report: api/baseapi.h:852:4: warning: extra ';' after member function definition [-Wextra-semi] [...] Signed-off-by: Stefan Weil --- api/baseapi.h | 2 +- ccstruct/params_training_featdef.h | 2 +- ccutil/bits16.h | 8 ++++---- ccutil/strngs.h | 4 ++-- cube/beam_search.h | 2 +- dict/dawg.h | 2 +- textord/blkocc.h | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/api/baseapi.h b/api/baseapi.h index a4f88b67fa..9c98112ec6 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -849,7 +849,7 @@ class TESS_API TessBaseAPI { TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; - }; + } /* @} */ diff --git a/ccstruct/params_training_featdef.h b/ccstruct/params_training_featdef.h index ff76480be0..6e021f0b30 100644 --- a/ccstruct/params_training_featdef.h +++ b/ccstruct/params_training_featdef.h @@ -126,7 +126,7 @@ typedef GenericVector ParamsTrainingHypothesisList; // explored on PASS1, PASS2, fix xheight pass, etc). class ParamsTrainingBundle { public: - ParamsTrainingBundle() {}; + ParamsTrainingBundle() {} // Starts a new hypothesis list. // Should be called at the beginning of a new run of the segmentation search. void StartHypothesisList() { diff --git a/ccutil/bits16.h b/ccutil/bits16.h index 352b48bee2..6bbec4c0f1 100644 --- a/ccutil/bits16.h +++ b/ccutil/bits16.h @@ -37,12 +37,12 @@ class DLLSYM BITS16 void turn_on_bit( // flip specified bit uinT8 bit_num) { // bit to flip 0..7 val = val | 01 << bit_num; - }; + } void turn_off_bit( // flip specified bit uinT8 bit_num) { // bit to flip 0..7 val = val & ~(01 << bit_num); - }; + } void set_bit( // flip specified bit uinT8 bit_num, // bit to flip 0..7 @@ -51,11 +51,11 @@ class DLLSYM BITS16 val = val | 01 << bit_num; else val = val & ~(01 << bit_num); - }; + } BOOL8 bit( // access bit uinT8 bit_num) const { // bit to access return (val >> bit_num) & 01; - }; + } }; #endif diff --git a/ccutil/strngs.h b/ccutil/strngs.h index 1fe42b6076..ea1738895c 100644 --- a/ccutil/strngs.h +++ b/ccutil/strngs.h @@ -149,11 +149,11 @@ class TESS_API STRING // returns the string data part of storage inline char* GetCStr() { return ((char *)data_) + sizeof(STRING_HEADER); - }; + } inline const char* GetCStr() const { return ((const char *)data_) + sizeof(STRING_HEADER); - }; + } inline bool InvariantOk() const { #if STRING_IS_PROTECTED return (GetHeader()->used_ == 0) ? diff --git a/cube/beam_search.h b/cube/beam_search.h index cd8fc0110d..31659a7fef 100644 --- a/cube/beam_search.h +++ b/cube/beam_search.h @@ -80,7 +80,7 @@ class BeamSearch { // best-cost path before the alternates list is sorted. inline int BestPresortedNodeIndex() const { return best_presorted_node_idx_; - }; + } private: // Maximum reasonable segmentation point count diff --git a/dict/dawg.h b/dict/dawg.h index f040353118..c7169167d8 100644 --- a/dict/dawg.h +++ b/dict/dawg.h @@ -128,7 +128,7 @@ class Dawg { inline const STRING &lang() const { return lang_; } inline PermuterType permuter() const { return perm_; } - virtual ~Dawg() {}; + virtual ~Dawg() {} /// Returns true if the given word is in the Dawg. bool word_in_dawg(const WERD_CHOICE &word) const; diff --git a/textord/blkocc.h b/textord/blkocc.h index 8305c36cdb..d80afe25f3 100644 --- a/textord/blkocc.h +++ b/textord/blkocc.h @@ -53,7 +53,7 @@ class REGION_OCC:public ELIST_LINK inT16 region_type; //Type of crossing REGION_OCC() { - }; //constructor used + } //constructor used //only in COPIER etc REGION_OCC( //constructor float min, From 2298bdddc5d0df548978de1f5919ec254f894d79 Mon Sep 17 00:00:00 2001 From: Ray Smith Date: Tue, 6 Dec 2016 13:40:57 -0800 Subject: [PATCH 079/132] Fixed damage to binary images when processing PDFs, issue #535 --- ccmain/thresholder.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ccmain/thresholder.cpp b/ccmain/thresholder.cpp index 545ad5854e..fc8111ef7b 100644 --- a/ccmain/thresholder.cpp +++ b/ccmain/thresholder.cpp @@ -181,8 +181,11 @@ void ImageThresholder::SetImage(const Pix* pix) { // Caller must use pixDestroy to free the created Pix. void ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) { if (pix_channels_ == 0) { - // We have a binary image, so it just has to be cloned. - *pix = GetPixRect(); + // We have a binary image, but it still has to be copied, as this API + // allows the caller to modify the output. + Pix* original = GetPixRect(); + *pix = pixCopy(nullptr, original); + pixDestroy(&original); } else { OtsuThresholdRectToPix(pix_, pix); } From ddef7d4a7c1e6d1f2c67266a327f3d15d9b47446 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 15 May 2016 19:08:10 +0200 Subject: [PATCH 080/132] training: Update Makefile for current Mingw-w64 Mingw-w64 no longer needs special linker options, builds with those options fail. Signed-off-by: Stefan Weil --- training/Makefile.am | 7 ------- 1 file changed, 7 deletions(-) diff --git a/training/Makefile.am b/training/Makefile.am index fe3d85bcdc..2506226654 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -10,14 +10,7 @@ AM_CPPFLAGS += \ EXTRA_DIST = language-specific.sh tesstrain.sh tesstrain_utils.sh -if MINGW -# try static build -#AM_LDFLAGS += -all-static -#libic=-lsicuin -licudt -lsicuuc -libicu=-licuin -licuuc -else libicu=-licui18n -licuuc -endif # TODO: training programs can not be linked to shared library created # with -fvisibility if VISIBILITY From e34ab8c3d568f00bac4b2c1387353c998605635f Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 4 Dec 2016 20:41:37 +0100 Subject: [PATCH 081/132] doc: Fix line endings Remove spaces at line endings and replace CRLF by LF. Signed-off-by: Stefan Weil --- doc/ambiguous_words.1.html | 1580 ++++++++++----------- doc/ambiguous_words.1.xml | 86 +- doc/cntraining.1.html | 1610 ++++++++++----------- doc/cntraining.1.xml | 116 +- doc/combine_tessdata.1.asc | 2 +- doc/combine_tessdata.1.html | 2028 +++++++++++++-------------- doc/combine_tessdata.1.xml | 562 ++++---- doc/dawg2wordlist.1.html | 1604 ++++++++++----------- doc/dawg2wordlist.1.xml | 106 +- doc/mftraining.1.asc | 4 +- doc/mftraining.1.html | 1694 +++++++++++----------- doc/mftraining.1.xml | 204 +-- doc/shapeclustering.1.asc | 2 +- doc/shapeclustering.1.html | 1700 +++++++++++----------- doc/shapeclustering.1.xml | 210 +-- doc/tesseract.1.asc | 24 +- doc/tesseract.1.html | 2326 +++++++++++++++---------------- doc/tesseract.1.xml | 848 +++++------ doc/unicharambigs.5.asc | 2 +- doc/unicharambigs.5.html | 1750 +++++++++++------------ doc/unicharambigs.5.xml | 252 ++-- doc/unicharset.5.html | 1930 ++++++++++++------------- doc/unicharset.5.xml | 438 +++--- doc/unicharset_extractor.1.asc | 24 +- doc/unicharset_extractor.1.html | 1630 +++++++++++----------- doc/unicharset_extractor.1.xml | 126 +- doc/wordlist2dawg.1.html | 1640 +++++++++++----------- doc/wordlist2dawg.1.xml | 138 +- 28 files changed, 11318 insertions(+), 11318 deletions(-) diff --git a/doc/ambiguous_words.1.html b/doc/ambiguous_words.1.html index 3fd5f7f1f6..be74b62d0d 100644 --- a/doc/ambiguous_words.1.html +++ b/doc/ambiguous_words.1.html @@ -1,790 +1,790 @@ - - - - - -AMBIGUOUS_WORDS(1) - - - - - -
-
-

SYNOPSIS

-
-

ambiguous_words [-l lang] TESSDATADIR WORDLIST AMBIGUOUSFILE

-
-
-
-

DESCRIPTION

-
-

ambiguous_words(1) runs Tesseract in a special mode, and for each word -in word list, produces a set of words which Tesseract thinks might be -ambiguous with it. TESSDATADIR must be set to the absolute path of -a directory containing tessdata/lang.traineddata.

-
-
-
-

SEE ALSO

-
-

tesseract(1)

-
-
-
-

COPYING

-
-

Copyright (C) 2012 Google, Inc. -Licensed under the Apache License, Version 2.0

-
-
-
-

AUTHOR

-
-

The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present).

-
-
-
-

- - - + + + + + +AMBIGUOUS_WORDS(1) + + + + + +
+
+

SYNOPSIS

+
+

ambiguous_words [-l lang] TESSDATADIR WORDLIST AMBIGUOUSFILE

+
+
+
+

DESCRIPTION

+
+

ambiguous_words(1) runs Tesseract in a special mode, and for each word +in word list, produces a set of words which Tesseract thinks might be +ambiguous with it. TESSDATADIR must be set to the absolute path of +a directory containing tessdata/lang.traineddata.

+
+
+
+

SEE ALSO

+
+

tesseract(1)

+
+
+
+

COPYING

+
+

Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/doc/ambiguous_words.1.xml b/doc/ambiguous_words.1.xml index 6293866ceb..4900c6eb93 100644 --- a/doc/ambiguous_words.1.xml +++ b/doc/ambiguous_words.1.xml @@ -1,43 +1,43 @@ - - - - - - - AMBIGUOUS_WORDS(1) - - -ambiguous_words -1 -  -  - - - ambiguous_words - generate sets of words Tesseract is likely to find ambiguous - - -ambiguous_words [-l lang] TESSDATADIR WORDLIST AMBIGUOUSFILE - - -DESCRIPTION -ambiguous_words(1) runs Tesseract in a special mode, and for each word -in word list, produces a set of words which Tesseract thinks might be -ambiguous with it. TESSDATADIR must be set to the absolute path of -a directory containing tessdata/lang.traineddata. - - -SEE ALSO -tesseract(1) - - -COPYING -Copyright (C) 2012 Google, Inc. -Licensed under the Apache License, Version 2.0 - - -AUTHOR -The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present). - - + + + + + + + AMBIGUOUS_WORDS(1) + + +ambiguous_words +1 +  +  + + + ambiguous_words + generate sets of words Tesseract is likely to find ambiguous + + +ambiguous_words [-l lang] TESSDATADIR WORDLIST AMBIGUOUSFILE + + +DESCRIPTION +ambiguous_words(1) runs Tesseract in a special mode, and for each word +in word list, produces a set of words which Tesseract thinks might be +ambiguous with it. TESSDATADIR must be set to the absolute path of +a directory containing tessdata/lang.traineddata. + + +SEE ALSO +tesseract(1) + + +COPYING +Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0 + + +AUTHOR +The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present). + + diff --git a/doc/cntraining.1.html b/doc/cntraining.1.html index 706d3bd0f4..7653061e1e 100644 --- a/doc/cntraining.1.html +++ b/doc/cntraining.1.html @@ -1,805 +1,805 @@ - - - - - -CNTRAINING(1) - - - - - -
-
-

SYNOPSIS

-
-

cntraining [-D dir] FILE

-
-
-
-

DESCRIPTION

-
-

cntraining takes a list of .tr files, from which it generates the -normproto data file (the character normalization sensitivity -prototypes).

-
-
-
-

OPTIONS

-
-
-
--D dir -
-
-

- Directory to write output files to. -

-
-
-
-
-
-

SEE ALSO

-
-

tesseract(1), shapeclustering(1), mftraining(1)

- -
-
-
-

COPYING

-
-

Copyright (c) Hewlett-Packard Company, 1988 -Licensed under the Apache License, Version 2.0

-
-
-
-

AUTHOR

-
-

The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present).

-
-
-
-

- - - + + + + + +CNTRAINING(1) + + + + + +
+
+

SYNOPSIS

+
+

cntraining [-D dir] FILE

+
+
+
+

DESCRIPTION

+
+

cntraining takes a list of .tr files, from which it generates the +normproto data file (the character normalization sensitivity +prototypes).

+
+
+
+

OPTIONS

+
+
+
+-D dir +
+
+

+ Directory to write output files to. +

+
+
+
+
+
+

SEE ALSO

+
+

tesseract(1), shapeclustering(1), mftraining(1)

+ +
+
+
+

COPYING

+
+

Copyright (c) Hewlett-Packard Company, 1988 +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/doc/cntraining.1.xml b/doc/cntraining.1.xml index 6795f12f2c..6efc99be1d 100644 --- a/doc/cntraining.1.xml +++ b/doc/cntraining.1.xml @@ -1,58 +1,58 @@ - - - - - - - CNTRAINING(1) - - -cntraining -1 -  -  - - - cntraining - character normalization training for Tesseract - - -cntraining [-D dir] FILE - - -DESCRIPTION -cntraining takes a list of .tr files, from which it generates the -normproto data file (the character normalization sensitivity -prototypes). - - -OPTIONS - - - --D dir - - - - Directory to write output files to. - - - - - - -SEE ALSO -tesseract(1), shapeclustering(1), mftraining(1) -https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract - - -COPYING -Copyright (c) Hewlett-Packard Company, 1988 -Licensed under the Apache License, Version 2.0 - - -AUTHOR -The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present). - - + + + + + + + CNTRAINING(1) + + +cntraining +1 +  +  + + + cntraining + character normalization training for Tesseract + + +cntraining [-D dir] FILE + + +DESCRIPTION +cntraining takes a list of .tr files, from which it generates the +normproto data file (the character normalization sensitivity +prototypes). + + +OPTIONS + + + +-D dir + + + + Directory to write output files to. + + + + + + +SEE ALSO +tesseract(1), shapeclustering(1), mftraining(1) +https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract + + +COPYING +Copyright (c) Hewlett-Packard Company, 1988 +Licensed under the Apache License, Version 2.0 + + +AUTHOR +The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present). + + diff --git a/doc/combine_tessdata.1.asc b/doc/combine_tessdata.1.asc index d93de7ea0f..7b5295f227 100644 --- a/doc/combine_tessdata.1.asc +++ b/doc/combine_tessdata.1.asc @@ -11,7 +11,7 @@ SYNOPSIS DESCRIPTION ----------- -combine_tessdata(1) is the main program to combine/extract/overwrite +combine_tessdata(1) is the main program to combine/extract/overwrite tessdata components in [lang].traineddata files. To combine all the individual tessdata components (unicharset, DAWGs, diff --git a/doc/combine_tessdata.1.html b/doc/combine_tessdata.1.html index 8de474b33b..a7f699f939 100644 --- a/doc/combine_tessdata.1.html +++ b/doc/combine_tessdata.1.html @@ -1,1014 +1,1014 @@ - - - - - -COMBINE_TESSDATA(1) - - - - - -
-
-

SYNOPSIS

-
-

combine_tessdata [OPTION] FILE

-
-
-
-

DESCRIPTION

-
-

combine_tessdata(1) is the main program to combine/extract/overwrite -tessdata components in [lang].traineddata files.

-

To combine all the individual tessdata components (unicharset, DAWGs, -classifier templates, ambiguities, language configs) located at, say, -/home/$USER/temp/eng.* run:

-
-
-
combine_tessdata /home/$USER/temp/eng.
-
-

The result will be a combined tessdata file /home/$USER/temp/eng.traineddata

-

Specify option -e if you would like to extract individual components -from a combined traineddata file. For example, to extract language config -file and the unicharset from tessdata/eng.traineddata run:

-
-
-
combine_tessdata -e tessdata/eng.traineddata \
-  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset
-
-

The desired config file and unicharset will be written to -/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset

-

Specify option -o to overwrite individual components of the given -[lang].traineddata file. For example, to overwrite language config -and unichar ambiguities files in tessdata/eng.traineddata use:

-
-
-
combine_tessdata -o tessdata/eng.traineddata \
-  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs
-
-

As a result, tessdata/eng.traineddata will contain the new language config -and unichar ambigs, plus all the original DAWGs, classifier templates, etc.

-

Note: the file names of the files to extract to and to overwrite from should -have the appropriate file suffixes (extensions) indicating their tessdata -component type (.unicharset for the unicharset, .unicharambigs for unichar -ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.

-

Specify option -u to unpack all the components to the specified path:

-
-
-
combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.
-
-

This will create /home/$USER/temp/eng.* files with individual tessdata -components from tessdata/eng.traineddata.

-
-
-
-

OPTIONS

-
-

-e .traineddata FILE…: - Extracts the specified components from the .traineddata file

-

-o .traineddata FILE…: - Overwrites the specified components of the .traineddata file - with those provided on the comand line.

-

-u .traineddata PATHPREFIX - Unpacks the .traineddata using the provided prefix.

-
-
-
-

CAVEATS

-
-

Prefix refers to the full file prefix, including period (.)

-
-
-
-

COMPONENTS

-
-

The components in a Tesseract lang.traineddata file as of -Tesseract 3.02 are briefly described below; For more information on -many of these files, see -https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract

-
-
-lang.config -
-
-

- (Optional) Language-specific overrides to default config variables. -

-
-
-lang.unicharset -
-
-

- (Required) The list of symbols that Tesseract recognizes, with properties. - See unicharset(5). -

-
-
-lang.unicharambigs -
-
-

- (Optional) This file contains information on pairs of recognized symbols - which are often confused. For example, rn and m. -

-
-
-lang.inttemp -
-
-

- (Required) Character shape templates for each unichar. Produced by - mftraining(1). -

-
-
-lang.pffmtable -
-
-

- (Required) The number of features expected for each unichar. - Produced by mftraining(1) from .tr files. -

-
-
-lang.normproto -
-
-

- (Required) Character normalization prototypes generated by cntraining(1) - from .tr files. -

-
-
-lang.punc-dawg -
-
-

- (Optional) A dawg made from punctuation patterns found around words. - The "word" part is replaced by a single space. -

-
-
-lang.word-dawg -
-
-

- (Optional) A dawg made from dictionary words from the language. -

-
-
-lang.number-dawg -
-
-

- (Optional) A dawg made from tokens which originally contained digits. - Each digit is replaced by a space character. -

-
-
-lang.freq-dawg -
-
-

- (Optional) A dawg made from the most frequent words which would have - gone into word-dawg. -

-
-
-lang.fixed-length-dawgs -
-
-

- (Optional) Several dawgs of different fixed lengths — useful for - languages like Chinese. -

-
-
-lang.cube-unicharset -
-
-

- (Optional) A unicharset for cube, if cube was trained on a different set - of symbols. -

-
-
-lang.cube-word-dawg -
-
-

- (Optional) A word dawg for cube’s alternate unicharset. Not needed if Cube - was trained with Tesseract’s unicharset. -

-
-
-lang.shapetable -
-
-

- (Optional) When present, a shapetable is an extra layer between the character - classifier and the word recognizer that allows the character classifier to - return a collection of unichar ids and fonts instead of a single unichar-id - and font. -

-
-
-lang.bigram-dawg -
-
-

- (Optional) A dawg of word bigrams where the words are separated by a space - and each digit is replaced by a ?. -

-
-
-lang.unambig-dawg -
-
-

- (Optional) TODO: Describe. -

-
-
-lang.params-training-model -
-
-

- (Optional) TODO: Describe. -

-
-
-
-
-
-

HISTORY

-
-

combine_tessdata(1) first appeared in version 3.00 of Tesseract

-
-
-
-

SEE ALSO

-
-

tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5), -unicharambigs(5)

-
-
-
-

COPYING

-
-

Copyright (C) 2009, Google Inc. -Licensed under the Apache License, Version 2.0

-
-
-
-

AUTHOR

-
-

The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present).

-
-
-
-

- - - + + + + + +COMBINE_TESSDATA(1) + + + + + +
+
+

SYNOPSIS

+
+

combine_tessdata [OPTION] FILE

+
+
+
+

DESCRIPTION

+
+

combine_tessdata(1) is the main program to combine/extract/overwrite +tessdata components in [lang].traineddata files.

+

To combine all the individual tessdata components (unicharset, DAWGs, +classifier templates, ambiguities, language configs) located at, say, +/home/$USER/temp/eng.* run:

+
+
+
combine_tessdata /home/$USER/temp/eng.
+
+

The result will be a combined tessdata file /home/$USER/temp/eng.traineddata

+

Specify option -e if you would like to extract individual components +from a combined traineddata file. For example, to extract language config +file and the unicharset from tessdata/eng.traineddata run:

+
+
+
combine_tessdata -e tessdata/eng.traineddata \
+  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset
+
+

The desired config file and unicharset will be written to +/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset

+

Specify option -o to overwrite individual components of the given +[lang].traineddata file. For example, to overwrite language config +and unichar ambiguities files in tessdata/eng.traineddata use:

+
+
+
combine_tessdata -o tessdata/eng.traineddata \
+  /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs
+
+

As a result, tessdata/eng.traineddata will contain the new language config +and unichar ambigs, plus all the original DAWGs, classifier templates, etc.

+

Note: the file names of the files to extract to and to overwrite from should +have the appropriate file suffixes (extensions) indicating their tessdata +component type (.unicharset for the unicharset, .unicharambigs for unichar +ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h.

+

Specify option -u to unpack all the components to the specified path:

+
+
+
combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng.
+
+

This will create /home/$USER/temp/eng.* files with individual tessdata +components from tessdata/eng.traineddata.

+
+
+
+

OPTIONS

+
+

-e .traineddata FILE…: + Extracts the specified components from the .traineddata file

+

-o .traineddata FILE…: + Overwrites the specified components of the .traineddata file + with those provided on the comand line.

+

-u .traineddata PATHPREFIX + Unpacks the .traineddata using the provided prefix.

+
+
+
+

CAVEATS

+
+

Prefix refers to the full file prefix, including period (.)

+
+
+
+

COMPONENTS

+
+

The components in a Tesseract lang.traineddata file as of +Tesseract 3.02 are briefly described below; For more information on +many of these files, see +https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract

+
+
+lang.config +
+
+

+ (Optional) Language-specific overrides to default config variables. +

+
+
+lang.unicharset +
+
+

+ (Required) The list of symbols that Tesseract recognizes, with properties. + See unicharset(5). +

+
+
+lang.unicharambigs +
+
+

+ (Optional) This file contains information on pairs of recognized symbols + which are often confused. For example, rn and m. +

+
+
+lang.inttemp +
+
+

+ (Required) Character shape templates for each unichar. Produced by + mftraining(1). +

+
+
+lang.pffmtable +
+
+

+ (Required) The number of features expected for each unichar. + Produced by mftraining(1) from .tr files. +

+
+
+lang.normproto +
+
+

+ (Required) Character normalization prototypes generated by cntraining(1) + from .tr files. +

+
+
+lang.punc-dawg +
+
+

+ (Optional) A dawg made from punctuation patterns found around words. + The "word" part is replaced by a single space. +

+
+
+lang.word-dawg +
+
+

+ (Optional) A dawg made from dictionary words from the language. +

+
+
+lang.number-dawg +
+
+

+ (Optional) A dawg made from tokens which originally contained digits. + Each digit is replaced by a space character. +

+
+
+lang.freq-dawg +
+
+

+ (Optional) A dawg made from the most frequent words which would have + gone into word-dawg. +

+
+
+lang.fixed-length-dawgs +
+
+

+ (Optional) Several dawgs of different fixed lengths — useful for + languages like Chinese. +

+
+
+lang.cube-unicharset +
+
+

+ (Optional) A unicharset for cube, if cube was trained on a different set + of symbols. +

+
+
+lang.cube-word-dawg +
+
+

+ (Optional) A word dawg for cube’s alternate unicharset. Not needed if Cube + was trained with Tesseract’s unicharset. +

+
+
+lang.shapetable +
+
+

+ (Optional) When present, a shapetable is an extra layer between the character + classifier and the word recognizer that allows the character classifier to + return a collection of unichar ids and fonts instead of a single unichar-id + and font. +

+
+
+lang.bigram-dawg +
+
+

+ (Optional) A dawg of word bigrams where the words are separated by a space + and each digit is replaced by a ?. +

+
+
+lang.unambig-dawg +
+
+

+ (Optional) TODO: Describe. +

+
+
+lang.params-training-model +
+
+

+ (Optional) TODO: Describe. +

+
+
+
+
+
+

HISTORY

+
+

combine_tessdata(1) first appeared in version 3.00 of Tesseract

+
+
+
+

SEE ALSO

+
+

tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5), +unicharambigs(5)

+
+
+
+

COPYING

+
+

Copyright (C) 2009, Google Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/doc/combine_tessdata.1.xml b/doc/combine_tessdata.1.xml index 1a43995fb5..693e1343b5 100644 --- a/doc/combine_tessdata.1.xml +++ b/doc/combine_tessdata.1.xml @@ -1,281 +1,281 @@ - - - - - - - COMBINE_TESSDATA(1) - - -combine_tessdata -1 -  -  - - - combine_tessdata - combine/extract/overwrite Tesseract data - - -combine_tessdata [OPTION] FILE - - -DESCRIPTION -combine_tessdata(1) is the main program to combine/extract/overwrite -tessdata components in [lang].traineddata files. -To combine all the individual tessdata components (unicharset, DAWGs, -classifier templates, ambiguities, language configs) located at, say, -/home/$USER/temp/eng.* run: -combine_tessdata /home/$USER/temp/eng. -The result will be a combined tessdata file /home/$USER/temp/eng.traineddata -Specify option -e if you would like to extract individual components -from a combined traineddata file. For example, to extract language config -file and the unicharset from tessdata/eng.traineddata run: -combine_tessdata -e tessdata/eng.traineddata \ - /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset -The desired config file and unicharset will be written to -/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset -Specify option -o to overwrite individual components of the given -[lang].traineddata file. For example, to overwrite language config -and unichar ambiguities files in tessdata/eng.traineddata use: -combine_tessdata -o tessdata/eng.traineddata \ - /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs -As a result, tessdata/eng.traineddata will contain the new language config -and unichar ambigs, plus all the original DAWGs, classifier templates, etc. -Note: the file names of the files to extract to and to overwrite from should -have the appropriate file suffixes (extensions) indicating their tessdata -component type (.unicharset for the unicharset, .unicharambigs for unichar -ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h. -Specify option -u to unpack all the components to the specified path: -combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng. -This will create /home/$USER/temp/eng.* files with individual tessdata -components from tessdata/eng.traineddata. - - -OPTIONS --e .traineddata FILE…: - Extracts the specified components from the .traineddata file --o .traineddata FILE…: - Overwrites the specified components of the .traineddata file - with those provided on the comand line. --u .traineddata PATHPREFIX - Unpacks the .traineddata using the provided prefix. - - -CAVEATS -Prefix refers to the full file prefix, including period (.) - - -COMPONENTS -The components in a Tesseract lang.traineddata file as of -Tesseract 3.02 are briefly described below; For more information on -many of these files, see -https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract - - - -lang.config - - - - (Optional) Language-specific overrides to default config variables. - - - - - -lang.unicharset - - - - (Required) The list of symbols that Tesseract recognizes, with properties. - See unicharset(5). - - - - - -lang.unicharambigs - - - - (Optional) This file contains information on pairs of recognized symbols - which are often confused. For example, rn and m. - - - - - -lang.inttemp - - - - (Required) Character shape templates for each unichar. Produced by - mftraining(1). - - - - - -lang.pffmtable - - - - (Required) The number of features expected for each unichar. - Produced by mftraining(1) from .tr files. - - - - - -lang.normproto - - - - (Required) Character normalization prototypes generated by cntraining(1) - from .tr files. - - - - - -lang.punc-dawg - - - - (Optional) A dawg made from punctuation patterns found around words. - The "word" part is replaced by a single space. - - - - - -lang.word-dawg - - - - (Optional) A dawg made from dictionary words from the language. - - - - - -lang.number-dawg - - - - (Optional) A dawg made from tokens which originally contained digits. - Each digit is replaced by a space character. - - - - - -lang.freq-dawg - - - - (Optional) A dawg made from the most frequent words which would have - gone into word-dawg. - - - - - -lang.fixed-length-dawgs - - - - (Optional) Several dawgs of different fixed lengths — useful for - languages like Chinese. - - - - - -lang.cube-unicharset - - - - (Optional) A unicharset for cube, if cube was trained on a different set - of symbols. - - - - - -lang.cube-word-dawg - - - - (Optional) A word dawg for cube’s alternate unicharset. Not needed if Cube - was trained with Tesseract’s unicharset. - - - - - -lang.shapetable - - - - (Optional) When present, a shapetable is an extra layer between the character - classifier and the word recognizer that allows the character classifier to - return a collection of unichar ids and fonts instead of a single unichar-id - and font. - - - - - -lang.bigram-dawg - - - - (Optional) A dawg of word bigrams where the words are separated by a space - and each digit is replaced by a ?. - - - - - -lang.unambig-dawg - - - - (Optional) TODO: Describe. - - - - - -lang.params-training-model - - - - (Optional) TODO: Describe. - - - - - - -HISTORY -combine_tessdata(1) first appeared in version 3.00 of Tesseract - - -SEE ALSO -tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5), -unicharambigs(5) - - -COPYING -Copyright (C) 2009, Google Inc. -Licensed under the Apache License, Version 2.0 - - -AUTHOR -The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present). - - + + + + + + + COMBINE_TESSDATA(1) + + +combine_tessdata +1 +  +  + + + combine_tessdata + combine/extract/overwrite Tesseract data + + +combine_tessdata [OPTION] FILE + + +DESCRIPTION +combine_tessdata(1) is the main program to combine/extract/overwrite +tessdata components in [lang].traineddata files. +To combine all the individual tessdata components (unicharset, DAWGs, +classifier templates, ambiguities, language configs) located at, say, +/home/$USER/temp/eng.* run: +combine_tessdata /home/$USER/temp/eng. +The result will be a combined tessdata file /home/$USER/temp/eng.traineddata +Specify option -e if you would like to extract individual components +from a combined traineddata file. For example, to extract language config +file and the unicharset from tessdata/eng.traineddata run: +combine_tessdata -e tessdata/eng.traineddata \ + /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset +The desired config file and unicharset will be written to +/home/$USER/temp/eng.config /home/$USER/temp/eng.unicharset +Specify option -o to overwrite individual components of the given +[lang].traineddata file. For example, to overwrite language config +and unichar ambiguities files in tessdata/eng.traineddata use: +combine_tessdata -o tessdata/eng.traineddata \ + /home/$USER/temp/eng.config /home/$USER/temp/eng.unicharambigs +As a result, tessdata/eng.traineddata will contain the new language config +and unichar ambigs, plus all the original DAWGs, classifier templates, etc. +Note: the file names of the files to extract to and to overwrite from should +have the appropriate file suffixes (extensions) indicating their tessdata +component type (.unicharset for the unicharset, .unicharambigs for unichar +ambigs, etc). See k*FileSuffix variable in ccutil/tessdatamanager.h. +Specify option -u to unpack all the components to the specified path: +combine_tessdata -u tessdata/eng.traineddata /home/$USER/temp/eng. +This will create /home/$USER/temp/eng.* files with individual tessdata +components from tessdata/eng.traineddata. + + +OPTIONS +-e .traineddata FILE…: + Extracts the specified components from the .traineddata file +-o .traineddata FILE…: + Overwrites the specified components of the .traineddata file + with those provided on the comand line. +-u .traineddata PATHPREFIX + Unpacks the .traineddata using the provided prefix. + + +CAVEATS +Prefix refers to the full file prefix, including period (.) + + +COMPONENTS +The components in a Tesseract lang.traineddata file as of +Tesseract 3.02 are briefly described below; For more information on +many of these files, see +https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract + + + +lang.config + + + + (Optional) Language-specific overrides to default config variables. + + + + + +lang.unicharset + + + + (Required) The list of symbols that Tesseract recognizes, with properties. + See unicharset(5). + + + + + +lang.unicharambigs + + + + (Optional) This file contains information on pairs of recognized symbols + which are often confused. For example, rn and m. + + + + + +lang.inttemp + + + + (Required) Character shape templates for each unichar. Produced by + mftraining(1). + + + + + +lang.pffmtable + + + + (Required) The number of features expected for each unichar. + Produced by mftraining(1) from .tr files. + + + + + +lang.normproto + + + + (Required) Character normalization prototypes generated by cntraining(1) + from .tr files. + + + + + +lang.punc-dawg + + + + (Optional) A dawg made from punctuation patterns found around words. + The "word" part is replaced by a single space. + + + + + +lang.word-dawg + + + + (Optional) A dawg made from dictionary words from the language. + + + + + +lang.number-dawg + + + + (Optional) A dawg made from tokens which originally contained digits. + Each digit is replaced by a space character. + + + + + +lang.freq-dawg + + + + (Optional) A dawg made from the most frequent words which would have + gone into word-dawg. + + + + + +lang.fixed-length-dawgs + + + + (Optional) Several dawgs of different fixed lengths — useful for + languages like Chinese. + + + + + +lang.cube-unicharset + + + + (Optional) A unicharset for cube, if cube was trained on a different set + of symbols. + + + + + +lang.cube-word-dawg + + + + (Optional) A word dawg for cube’s alternate unicharset. Not needed if Cube + was trained with Tesseract’s unicharset. + + + + + +lang.shapetable + + + + (Optional) When present, a shapetable is an extra layer between the character + classifier and the word recognizer that allows the character classifier to + return a collection of unichar ids and fonts instead of a single unichar-id + and font. + + + + + +lang.bigram-dawg + + + + (Optional) A dawg of word bigrams where the words are separated by a space + and each digit is replaced by a ?. + + + + + +lang.unambig-dawg + + + + (Optional) TODO: Describe. + + + + + +lang.params-training-model + + + + (Optional) TODO: Describe. + + + + + + +HISTORY +combine_tessdata(1) first appeared in version 3.00 of Tesseract + + +SEE ALSO +tesseract(1), wordlist2dawg(1), cntraining(1), mftraining(1), unicharset(5), +unicharambigs(5) + + +COPYING +Copyright (C) 2009, Google Inc. +Licensed under the Apache License, Version 2.0 + + +AUTHOR +The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present). + + diff --git a/doc/dawg2wordlist.1.html b/doc/dawg2wordlist.1.html index b700fe186d..0b2645dfb7 100644 --- a/doc/dawg2wordlist.1.html +++ b/doc/dawg2wordlist.1.html @@ -1,802 +1,802 @@ - - - - - -DAWG2WORDLIST(1) - - - - - -
-
-

SYNOPSIS

-
-

dawg2wordlist UNICHARSET DAWG WORDLIST

-
-
-
-

DESCRIPTION

-
-

dawg2wordlist(1) converts a Tesseract Directed Acyclic Word -Graph (DAWG) to a list of words using a unicharset as key.

-
-
-
-

OPTIONS

-
-

UNICHARSET - The unicharset of the language. This is the unicharset - generated by mftraining(1).

-

DAWG - The input DAWG, created by wordlist2dawg(1)

-

WORDLIST - Plain text (output) file in UTF-8, one word per line

-
-
-
-

SEE ALSO

-
-

tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), -combine_tessdata(1)

- -
-
-
-

COPYING

-
-

Copyright (C) 2012 Google, Inc. -Licensed under the Apache License, Version 2.0

-
-
-
-

AUTHOR

-
-

The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present).

-
-
-
-

- - - + + + + + +DAWG2WORDLIST(1) + + + + + +
+
+

SYNOPSIS

+
+

dawg2wordlist UNICHARSET DAWG WORDLIST

+
+
+
+

DESCRIPTION

+
+

dawg2wordlist(1) converts a Tesseract Directed Acyclic Word +Graph (DAWG) to a list of words using a unicharset as key.

+
+
+
+

OPTIONS

+
+

UNICHARSET + The unicharset of the language. This is the unicharset + generated by mftraining(1).

+

DAWG + The input DAWG, created by wordlist2dawg(1)

+

WORDLIST + Plain text (output) file in UTF-8, one word per line

+
+
+
+

SEE ALSO

+
+

tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), +combine_tessdata(1)

+ +
+
+
+

COPYING

+
+

Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/doc/dawg2wordlist.1.xml b/doc/dawg2wordlist.1.xml index c73113191c..ee960ad9fc 100644 --- a/doc/dawg2wordlist.1.xml +++ b/doc/dawg2wordlist.1.xml @@ -1,53 +1,53 @@ - - - - - - - DAWG2WORDLIST(1) - - -dawg2wordlist -1 -  -  - - - dawg2wordlist - convert a Tesseract DAWG to a wordlist - - -dawg2wordlist UNICHARSET DAWG WORDLIST - - -DESCRIPTION -dawg2wordlist(1) converts a Tesseract Directed Acyclic Word -Graph (DAWG) to a list of words using a unicharset as key. - - -OPTIONS -UNICHARSET - The unicharset of the language. This is the unicharset - generated by mftraining(1). -DAWG - The input DAWG, created by wordlist2dawg(1) -WORDLIST - Plain text (output) file in UTF-8, one word per line - - -SEE ALSO -tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), -combine_tessdata(1) -https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract - - -COPYING -Copyright (C) 2012 Google, Inc. -Licensed under the Apache License, Version 2.0 - - -AUTHOR -The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present). - - + + + + + + + DAWG2WORDLIST(1) + + +dawg2wordlist +1 +  +  + + + dawg2wordlist + convert a Tesseract DAWG to a wordlist + + +dawg2wordlist UNICHARSET DAWG WORDLIST + + +DESCRIPTION +dawg2wordlist(1) converts a Tesseract Directed Acyclic Word +Graph (DAWG) to a list of words using a unicharset as key. + + +OPTIONS +UNICHARSET + The unicharset of the language. This is the unicharset + generated by mftraining(1). +DAWG + The input DAWG, created by wordlist2dawg(1) +WORDLIST + Plain text (output) file in UTF-8, one word per line + + +SEE ALSO +tesseract(1), mftraining(1), wordlist2dawg(1), unicharset(5), +combine_tessdata(1) +https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract + + +COPYING +Copyright (C) 2012 Google, Inc. +Licensed under the Apache License, Version 2.0 + + +AUTHOR +The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present). + + diff --git a/doc/mftraining.1.asc b/doc/mftraining.1.asc index 85e1263ade..43fe533a16 100644 --- a/doc/mftraining.1.asc +++ b/doc/mftraining.1.asc @@ -24,12 +24,12 @@ OPTIONS -F 'font_properties_file':: (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1: - + *font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur* -X 'xheights_file':: (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] - + *font_name* *xheight* -D 'dir':: diff --git a/doc/mftraining.1.html b/doc/mftraining.1.html index 4abdfd6a6c..41a3804457 100644 --- a/doc/mftraining.1.html +++ b/doc/mftraining.1.html @@ -1,847 +1,847 @@ - - - - - -MFTRAINING(1) - - - - - -
-
-

SYNOPSIS

-
-

mftraining -U unicharset -O lang.unicharset FILE

-
-
-
-

DESCRIPTION

-
-

mftraining takes a list of .tr files, from which it generates the -files inttemp (the shape prototypes), shapetable, and pffmtable -(the number of expected features for each character). (A fourth file -called Microfeat is also written by this program, but it is not used.)

-
-
-
-

OPTIONS

-
-
-
--U FILE -
-
-

- (Input) The unicharset generated by unicharset_extractor(1) -

-
-
--F font_properties_file -
-
-

- (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1: -

-
-
-
*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*
-
-
-
--X xheights_file -
-
-

- (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] -

-
-
-
*font_name* *xheight*
-
-
-
--D dir -
-
-

- Directory to write output files to. -

-
-
--O FILE -
-
-

- (Output) The output unicharset that will be given to combine_tessdata(1) -

-
-
-
-
-
-

SEE ALSO

-
-

tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), -shapeclustering(1), unicharset(5)

- -
-
-
-

COPYING

-
-

Copyright (C) Hewlett-Packard Company, 1988 -Licensed under the Apache License, Version 2.0

-
-
-
-

AUTHOR

-
-

The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present).

-
-
-
-

- - - + + + + + +MFTRAINING(1) + + + + + +
+
+

SYNOPSIS

+
+

mftraining -U unicharset -O lang.unicharset FILE

+
+
+
+

DESCRIPTION

+
+

mftraining takes a list of .tr files, from which it generates the +files inttemp (the shape prototypes), shapetable, and pffmtable +(the number of expected features for each character). (A fourth file +called Microfeat is also written by this program, but it is not used.)

+
+
+
+

OPTIONS

+
+
+
+-U FILE +
+
+

+ (Input) The unicharset generated by unicharset_extractor(1) +

+
+
+-F font_properties_file +
+
+

+ (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1: +

+
+
+
*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur*
+
+
+
+-X xheights_file +
+
+

+ (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] +

+
+
+
*font_name* *xheight*
+
+
+
+-D dir +
+
+

+ Directory to write output files to. +

+
+
+-O FILE +
+
+

+ (Output) The output unicharset that will be given to combine_tessdata(1) +

+
+
+
+
+
+

SEE ALSO

+
+

tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), +shapeclustering(1), unicharset(5)

+ +
+
+
+

COPYING

+
+

Copyright (C) Hewlett-Packard Company, 1988 +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/doc/mftraining.1.xml b/doc/mftraining.1.xml index 239178a5c1..10b3c6d2e5 100644 --- a/doc/mftraining.1.xml +++ b/doc/mftraining.1.xml @@ -1,102 +1,102 @@ - - - - - - - MFTRAINING(1) - - -mftraining -1 -  -  - - - mftraining - feature training for Tesseract - - -mftraining -U unicharset -O lang.unicharset FILE - - -DESCRIPTION -mftraining takes a list of .tr files, from which it generates the -files inttemp (the shape prototypes), shapetable, and pffmtable -(the number of expected features for each character). (A fourth file -called Microfeat is also written by this program, but it is not used.) - - -OPTIONS - - - --U FILE - - - - (Input) The unicharset generated by unicharset_extractor(1) - - - - - --F font_properties_file - - - - (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1: - -*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur* - - - - --X xheights_file - - - - (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] - -*font_name* *xheight* - - - - --D dir - - - - Directory to write output files to. - - - - - --O FILE - - - - (Output) The output unicharset that will be given to combine_tessdata(1) - - - - - - -SEE ALSO -tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), -shapeclustering(1), unicharset(5) -https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract - - -COPYING -Copyright (C) Hewlett-Packard Company, 1988 -Licensed under the Apache License, Version 2.0 - - -AUTHOR -The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present). - - + + + + + + + MFTRAINING(1) + + +mftraining +1 +  +  + + + mftraining + feature training for Tesseract + + +mftraining -U unicharset -O lang.unicharset FILE + + +DESCRIPTION +mftraining takes a list of .tr files, from which it generates the +files inttemp (the shape prototypes), shapetable, and pffmtable +(the number of expected features for each character). (A fourth file +called Microfeat is also written by this program, but it is not used.) + + +OPTIONS + + + +-U FILE + + + + (Input) The unicharset generated by unicharset_extractor(1) + + + + + +-F font_properties_file + + + + (Input) font properties file, each line is of the following form, where each field other than the font name is 0 or 1: + +*font_name* *italic* *bold* *fixed_pitch* *serif* *fraktur* + + + + +-X xheights_file + + + + (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] + +*font_name* *xheight* + + + + +-D dir + + + + Directory to write output files to. + + + + + +-O FILE + + + + (Output) The output unicharset that will be given to combine_tessdata(1) + + + + + + +SEE ALSO +tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), +shapeclustering(1), unicharset(5) +https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract + + +COPYING +Copyright (C) Hewlett-Packard Company, 1988 +Licensed under the Apache License, Version 2.0 + + +AUTHOR +The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present). + + diff --git a/doc/shapeclustering.1.asc b/doc/shapeclustering.1.asc index 81ca0dbc09..0a1bfb035b 100644 --- a/doc/shapeclustering.1.asc +++ b/doc/shapeclustering.1.asc @@ -35,7 +35,7 @@ OPTIONS -X 'xheights_file':: (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] - + 'font_name' 'xheight' -O 'FILE':: diff --git a/doc/shapeclustering.1.html b/doc/shapeclustering.1.html index 845d49a815..5fca944fc8 100644 --- a/doc/shapeclustering.1.html +++ b/doc/shapeclustering.1.html @@ -1,850 +1,850 @@ - - - - - -SHAPECLUSTERING(1) - - - - - -
-
-

SYNOPSIS

-
-

shapeclustering -D output_dir - -U unicharset -O mfunicharset - -F font_props -X xheights - FILE

-
-
-
-

DESCRIPTION

-
-

shapeclustering(1) takes extracted feature .tr files (generated by -tesseract(1) run in a special mode from box files) and produces a -file shapetable and an enhanced unicharset. This program is still -experimental, and is not required (yet) for training Tesseract.

-
-
-
-

OPTIONS

-
-
-
--U FILE -
-
-

- The unicharset generated by unicharset_extractor(1). -

-
-
--D dir -
-
-

- Directory to write output files to. -

-
-
--F font_properties_file -
-
-

- (Input) font properties file, where each line is of the following form, where each field other than the font name is 0 or 1: -

-
-
-
'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'
-
-
-
--X xheights_file -
-
-

- (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] -

-
-
-
'font_name' 'xheight'
-
-
-
--O FILE -
-
-

- The output unicharset that will be given to combine_tessdata(1). -

-
-
-
-
-
-

SEE ALSO

-
-

tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), -unicharset(5)

- -
-
-
-

COPYING

-
-

Copyright (C) Google, 2011 -Licensed under the Apache License, Version 2.0

-
-
-
-

AUTHOR

-
-

The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present).

-
-
-
-

- - - + + + + + +SHAPECLUSTERING(1) + + + + + +
+
+

SYNOPSIS

+
+

shapeclustering -D output_dir + -U unicharset -O mfunicharset + -F font_props -X xheights + FILE

+
+
+
+

DESCRIPTION

+
+

shapeclustering(1) takes extracted feature .tr files (generated by +tesseract(1) run in a special mode from box files) and produces a +file shapetable and an enhanced unicharset. This program is still +experimental, and is not required (yet) for training Tesseract.

+
+
+
+

OPTIONS

+
+
+
+-U FILE +
+
+

+ The unicharset generated by unicharset_extractor(1). +

+
+
+-D dir +
+
+

+ Directory to write output files to. +

+
+
+-F font_properties_file +
+
+

+ (Input) font properties file, where each line is of the following form, where each field other than the font name is 0 or 1: +

+
+
+
'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur'
+
+
+
+-X xheights_file +
+
+

+ (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] +

+
+
+
'font_name' 'xheight'
+
+
+
+-O FILE +
+
+

+ The output unicharset that will be given to combine_tessdata(1). +

+
+
+
+
+
+

SEE ALSO

+
+

tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), +unicharset(5)

+ +
+
+
+

COPYING

+
+

Copyright (C) Google, 2011 +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/doc/shapeclustering.1.xml b/doc/shapeclustering.1.xml index d02bcf8db9..933789ad3c 100644 --- a/doc/shapeclustering.1.xml +++ b/doc/shapeclustering.1.xml @@ -1,105 +1,105 @@ - - - - - - - SHAPECLUSTERING(1) - - -shapeclustering -1 -  -  - - - shapeclustering - shape clustering training for Tesseract - - -shapeclustering -D output_dir - -U unicharset -O mfunicharset - -F font_props -X xheights - FILE - - -DESCRIPTION -shapeclustering(1) takes extracted feature .tr files (generated by -tesseract(1) run in a special mode from box files) and produces a -file shapetable and an enhanced unicharset. This program is still -experimental, and is not required (yet) for training Tesseract. - - -OPTIONS - - - --U FILE - - - - The unicharset generated by unicharset_extractor(1). - - - - - --D dir - - - - Directory to write output files to. - - - - - --F font_properties_file - - - - (Input) font properties file, where each line is of the following form, where each field other than the font name is 0 or 1: - -'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur' - - - - --X xheights_file - - - - (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] - -'font_name' 'xheight' - - - - --O FILE - - - - The output unicharset that will be given to combine_tessdata(1). - - - - - - -SEE ALSO -tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), -unicharset(5) -https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract - - -COPYING -Copyright (C) Google, 2011 -Licensed under the Apache License, Version 2.0 - - -AUTHOR -The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present). - - + + + + + + + SHAPECLUSTERING(1) + + +shapeclustering +1 +  +  + + + shapeclustering + shape clustering training for Tesseract + + +shapeclustering -D output_dir + -U unicharset -O mfunicharset + -F font_props -X xheights + FILE + + +DESCRIPTION +shapeclustering(1) takes extracted feature .tr files (generated by +tesseract(1) run in a special mode from box files) and produces a +file shapetable and an enhanced unicharset. This program is still +experimental, and is not required (yet) for training Tesseract. + + +OPTIONS + + + +-U FILE + + + + The unicharset generated by unicharset_extractor(1). + + + + + +-D dir + + + + Directory to write output files to. + + + + + +-F font_properties_file + + + + (Input) font properties file, where each line is of the following form, where each field other than the font name is 0 or 1: + +'font_name' 'italic' 'bold' 'fixed_pitch' 'serif' 'fraktur' + + + + +-X xheights_file + + + + (Input) x heights file, each line is of the following form, where xheight is calculated as the pixel x height of a character drawn at 32pt on 300 dpi. [ That is, if base x height + ascenders + descenders = 133, how much is x height? ] + +'font_name' 'xheight' + + + + +-O FILE + + + + The output unicharset that will be given to combine_tessdata(1). + + + + + + +SEE ALSO +tesseract(1), cntraining(1), unicharset_extractor(1), combine_tessdata(1), +unicharset(5) +https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract + + +COPYING +Copyright (C) Google, 2011 +Licensed under the Apache License, Version 2.0 + + +AUTHOR +The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present). + + diff --git a/doc/tesseract.1.asc b/doc/tesseract.1.asc index 237299fe51..312aae07f6 100644 --- a/doc/tesseract.1.asc +++ b/doc/tesseract.1.asc @@ -67,7 +67,7 @@ OPTIONS 6 = Assume a single uniform block of text. 7 = Treat the image as a single text line. 8 = Treat the image as a single word. - 9 = Treat the image as a single word in a circle. + 9 = Treat the image as a single word in a circle. 10 = Treat the image as a single character. 'configfile':: @@ -264,10 +264,10 @@ on read_pattern_list(). HISTORY ------- -The engine was developed at Hewlett Packard Laboratories Bristol and at -Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more -changes made in 1996 to port to Windows, and some C\+\+izing in 1998. A -lot of the code was written in C, and then some more was written in C\+\+. +The engine was developed at Hewlett Packard Laboratories Bristol and at +Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more +changes made in 1996 to port to Windows, and some C\+\+izing in 1998. A +lot of the code was written in C, and then some more was written in C\+\+. The C\+\+ code makes heavy use of a list system using macros. This predates stl, was portable before stl, and is more efficient than stl lists, but has the big negative that if you do get a segmentation violation, it is hard to @@ -276,18 +276,18 @@ debug. Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability to train Tesseract. -Tesseract was included in UNLV's Fourth Annual Test of OCR Accuracy. +Tesseract was included in UNLV's Fourth Annual Test of OCR Accuracy. See . With Tesseract 2.00, -scripts are now included to allow anyone to reproduce some of these tests. -See for more +scripts are now included to allow anyone to reproduce some of these tests. +See for more details. -Tesseract 3.00 adds a number of new languages, including Chinese, Japanese, -and Korean. It also introduces a new, single-file based system of managing +Tesseract 3.00 adds a number of new languages, including Chinese, Japanese, +and Korean. It also introduces a new, single-file based system of managing language data. -Tesseract 3.02 adds BiDirectional text support, the ability to recognize -multiple languages in a single image, and improved layout analysis. +Tesseract 3.02 adds BiDirectional text support, the ability to recognize +multiple languages in a single image, and improved layout analysis. For further details, see the file ReleaseNotes included with the distribution. diff --git a/doc/tesseract.1.html b/doc/tesseract.1.html index 5e37d31170..d0addae65b 100644 --- a/doc/tesseract.1.html +++ b/doc/tesseract.1.html @@ -1,1163 +1,1163 @@ - - - - - -TESSERACT(1) - - - - - -
-
-

SYNOPSIS

-
-

tesseract imagename|stdin outputbase|stdout [options…] [configfile…]

-
-
-
-

DESCRIPTION

-
-

tesseract(1) is a commercial quality OCR engine originally developed at HP -between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by -UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed -at Google since then.

-
-
-
-

IN/OUT ARGUMENTS

-
-
-
-imagename -
-
-

- The name of the input image. Most image file formats (anything - readable by Leptonica) are supported. -

-
-
-stdin -
-
-

- Instruction to read data from standard input -

-
-
-outputbase -
-
-

- The basename of the output file (to which the appropriate extension - will be appended). By default the output will be named outbase.txt. -

-
-
-stdout -
-
-

- Instruction to sent output data to standard output -

-
-
-
-
-
-

OPTIONS

-
-
-
---tessdata-dir /path -
-
-

- Specify the location of tessdata path -

-
-
---user-words /path/to/file -
-
-

- Specify the location of user words file -

-
-
---user-patterns /path/to/file specify -
-
-

- The location of user patterns file -

-
-
--c configvar=value -
-
-

- Set value for control parameter. Multiple -c arguments are allowed. -

-
-
--l lang -
-
-

- The language to use. If none is specified, English is assumed. - Multiple languages may be specified, separated by plus characters. - Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES) -

-
-
---psm N -
-
-

- Set Tesseract to only run a subset of layout analysis and assume - a certain form of image. The options for N are: -

-
-
-
0 = Orientation and script detection (OSD) only.
-1 = Automatic page segmentation with OSD.
-2 = Automatic page segmentation, but no OSD, or OCR.
-3 = Fully automatic page segmentation, but no OSD. (Default)
-4 = Assume a single column of text of variable sizes.
-5 = Assume a single uniform block of vertically aligned text.
-6 = Assume a single uniform block of text.
-7 = Treat the image as a single text line.
-8 = Treat the image as a single word.
-9 = Treat the image as a single word in a circle.
-10 = Treat the image as a single character.
-
-
-
-configfile -
-
-

- The name of a config to use. A config is a plaintext file which - contains a list of variables and their values, one per line, with a - space separating variable from value. Interesting config files - include:
-

-
    -
  • -

    -hocr - Output in hOCR format instead of as a text file. -

    -
  • -
  • -

    -pdf - Output in pdf instead of a text file. -

    -
  • -
-
-
-

Nota Bene: The options -l lang and --psm N must occur -before any configfile.

-
-
-
-

SINGLE OPTIONS

-
-
-
--v -
-
-

- Returns the current version of the tesseract(1) executable. -

-
-
---list-langs -
-
-

- list available languages for tesseract engine. Can be used with --tessdata-dir. -

-
-
---print-parameters -
-
-

- print tesseract parameters to the stdout. -

-
-
-
-
-
-

LANGUAGES

-
-

There are currently language packs available for the following languages -(in https://github.com/tesseract-ocr/tessdata):

-

afr (Afrikaans) -amh (Amharic) -ara (Arabic) -asm (Assamese) -aze (Azerbaijani) -aze_cyrl (Azerbaijani - Cyrilic) -bel (Belarusian) -ben (Bengali) -bod (Tibetan) -bos (Bosnian) -bul (Bulgarian) -cat (Catalan; Valencian) -ceb (Cebuano) -ces (Czech) -chi_sim (Chinese - Simplified) -chi_tra (Chinese - Traditional) -chr (Cherokee) -cym (Welsh) -dan (Danish) -dan_frak (Danish - Fraktur) -deu (German) -deu_frak (German - Fraktur) -dzo (Dzongkha) -ell (Greek, Modern (1453-)) -eng (English) -enm (English, Middle (1100-1500)) -epo (Esperanto) -equ (Math / equation detection module) -est (Estonian) -eus (Basque) -fas (Persian) -fin (Finnish) -fra (French) -frk (Frankish) -frm (French, Middle (ca.1400-1600)) -gle (Irish) -glg (Galician) -grc (Greek, Ancient (to 1453)) -guj (Gujarati) -hat (Haitian; Haitian Creole) -heb (Hebrew) -hin (Hindi) -hrv (Croatian) -hun (Hungarian) -iku (Inuktitut) -ind (Indonesian) -isl (Icelandic) -ita (Italian) -ita_old (Italian - Old) -jav (Javanese) -jpn (Japanese) -kan (Kannada) -kat (Georgian) -kat_old (Georgian - Old) -kaz (Kazakh) -khm (Central Khmer) -kir (Kirghiz; Kyrgyz) -kor (Korean) -kur (Kurdish) -lao (Lao) -lat (Latin) -lav (Latvian) -lit (Lithuanian) -mal (Malayalam) -mar (Marathi) -mkd (Macedonian) -mlt (Maltese) -msa (Malay) -mya (Burmese) -nep (Nepali) -nld (Dutch; Flemish) -nor (Norwegian) -ori (Oriya) -osd (Orientation and script detection module) -pan (Panjabi; Punjabi) -pol (Polish) -por (Portuguese) -pus (Pushto; Pashto) -ron (Romanian; Moldavian; Moldovan) -rus (Russian) -san (Sanskrit) -sin (Sinhala; Sinhalese) -slk (Slovak) -slk_frak (Slovak - Fraktur) -slv (Slovenian) -spa (Spanish; Castilian) -spa_old (Spanish; Castilian - Old) -sqi (Albanian) -srp (Serbian) -srp_latn (Serbian - Latin) -swa (Swahili) -swe (Swedish) -syr (Syriac) -tam (Tamil) -tel (Telugu) -tgk (Tajik) -tgl (Tagalog) -tha (Thai) -tir (Tigrinya) -tur (Turkish) -uig (Uighur; Uyghur) -ukr (Ukrainian) -urd (Urdu) -uzb (Uzbek) -uzb_cyrl (Uzbek - Cyrilic) -vie (Vietnamese) -yid (Yiddish)

-

To use a non-standard language pack named foo.traineddata, set the -TESSDATA_PREFIX environment variable so the file can be found at -TESSDATA_PREFIX/tessdata/foo.traineddata and give Tesseract the -argument -l foo.

-
-
-
-

CONFIG FILES AND AUGMENTING WITH USER DATA

-
-

Tesseract config files consist of lines with variable-value pairs (space -separated). The variables are documented as flags in the source code like -the following one in tesseractclass.h:

-

STRING_VAR_H(tessedit_char_blacklist, "", - "Blacklist of chars not to recognize");

-

These variables may enable or disable various features of the engine, and -may cause it to load (or not load) various data. For instance, let’s suppose -you want to OCR in English, but suppress the normal dictionary and load an -alternative word list and an alternative list of patterns — these two files -are the most commonly used extra data files.

-

If your language pack is in /path/to/eng.traineddata and the hocr config -is in /path/to/configs/hocr then create three new files:

-

/path/to/eng.user-words:

-
-
the
-quick
-brown
-fox
-jumped
-
-
-

/path/to/eng.user-patterns:

-
-
1-\d\d\d-GOOG-411
-www.\n\\\*.com
-
-
-

/path/to/configs/bazaar:

-
-
load_system_dawg     F
-load_freq_dawg       F
-user_words_suffix    user-words
-user_patterns_suffix user-patterns
-
-
-

Now, if you pass the word bazaar as a trailing command line parameter -to Tesseract, Tesseract will not bother loading the system dictionary nor -the dictionary of frequent words and will load and use the eng.user-words -and eng.user-patterns files you provided. The former is a simple word list, -one per line. The format of the latter is documented in dict/trie.h -on read_pattern_list().

-
-
-
-

HISTORY

-
-

The engine was developed at Hewlett Packard Laboratories Bristol and at -Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more -changes made in 1996 to port to Windows, and some C++izing in 1998. A -lot of the code was written in C, and then some more was written in C++. -The C\++ code makes heavy use of a list system using macros. This predates -stl, was portable before stl, and is more efficient than stl lists, but has -the big negative that if you do get a segmentation violation, it is hard to -debug.

-

Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability -to train Tesseract.

-

Tesseract was included in UNLV’s Fourth Annual Test of OCR Accuracy. -See https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf. With Tesseract 2.00, -scripts are now included to allow anyone to reproduce some of these tests. -See https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract for more -details.

-

Tesseract 3.00 adds a number of new languages, including Chinese, Japanese, -and Korean. It also introduces a new, single-file based system of managing -language data.

-

Tesseract 3.02 adds BiDirectional text support, the ability to recognize -multiple languages in a single image, and improved layout analysis.

-

For further details, see the file ReleaseNotes included with the distribution.

-
-
- -
-

SEE ALSO

-
-

ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1), -shape_training(1), mftraining(1), unicharambigs(5), unicharset(5), -unicharset_extractor(1), wordlist2dawg(1)

-
-
-
-

AUTHOR

-
-

Tesseract development was led at Hewlett-Packard and Google by Ray Smith. -The development team has included:

-

Ahmad Abdulkader, Chris Newton, Dan Johnson, Dar-Shyang Lee, David Eger, -Eric Wiseblatt, Faisal Shafait, Hiroshi Takenaka, Joe Liu, Joern Wanke, -Mark Seaman, Mickey Namiki, Nicholas Beato, Oded Fuhrmann, Phil Cheatle, -Pingping Xiu, Pong Eksombatchai (Chantat), Ranjith Unnikrishnan, Raquel -Romano, Ray Smith, Rika Antonova, Robert Moss, Samuel Charron, Sheelagh -Lloyd, Shobhit Saxena, and Thomas Kielbus.

-
-
-
-

COPYING

-
-

Licensed under the Apache License, Version 2.0

-
-
-
-

- - - + + + + + +TESSERACT(1) + + + + + +
+
+

SYNOPSIS

+
+

tesseract imagename|stdin outputbase|stdout [options…] [configfile…]

+
+
+
+

DESCRIPTION

+
+

tesseract(1) is a commercial quality OCR engine originally developed at HP +between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by +UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed +at Google since then.

+
+
+
+

IN/OUT ARGUMENTS

+
+
+
+imagename +
+
+

+ The name of the input image. Most image file formats (anything + readable by Leptonica) are supported. +

+
+
+stdin +
+
+

+ Instruction to read data from standard input +

+
+
+outputbase +
+
+

+ The basename of the output file (to which the appropriate extension + will be appended). By default the output will be named outbase.txt. +

+
+
+stdout +
+
+

+ Instruction to sent output data to standard output +

+
+
+
+
+
+

OPTIONS

+
+
+
+--tessdata-dir /path +
+
+

+ Specify the location of tessdata path +

+
+
+--user-words /path/to/file +
+
+

+ Specify the location of user words file +

+
+
+--user-patterns /path/to/file specify +
+
+

+ The location of user patterns file +

+
+
+-c configvar=value +
+
+

+ Set value for control parameter. Multiple -c arguments are allowed. +

+
+
+-l lang +
+
+

+ The language to use. If none is specified, English is assumed. + Multiple languages may be specified, separated by plus characters. + Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES) +

+
+
+--psm N +
+
+

+ Set Tesseract to only run a subset of layout analysis and assume + a certain form of image. The options for N are: +

+
+
+
0 = Orientation and script detection (OSD) only.
+1 = Automatic page segmentation with OSD.
+2 = Automatic page segmentation, but no OSD, or OCR.
+3 = Fully automatic page segmentation, but no OSD. (Default)
+4 = Assume a single column of text of variable sizes.
+5 = Assume a single uniform block of vertically aligned text.
+6 = Assume a single uniform block of text.
+7 = Treat the image as a single text line.
+8 = Treat the image as a single word.
+9 = Treat the image as a single word in a circle.
+10 = Treat the image as a single character.
+
+
+
+configfile +
+
+

+ The name of a config to use. A config is a plaintext file which + contains a list of variables and their values, one per line, with a + space separating variable from value. Interesting config files + include:
+

+
    +
  • +

    +hocr - Output in hOCR format instead of as a text file. +

    +
  • +
  • +

    +pdf - Output in pdf instead of a text file. +

    +
  • +
+
+
+

Nota Bene: The options -l lang and --psm N must occur +before any configfile.

+
+
+
+

SINGLE OPTIONS

+
+
+
+-v +
+
+

+ Returns the current version of the tesseract(1) executable. +

+
+
+--list-langs +
+
+

+ list available languages for tesseract engine. Can be used with --tessdata-dir. +

+
+
+--print-parameters +
+
+

+ print tesseract parameters to the stdout. +

+
+
+
+
+
+

LANGUAGES

+
+

There are currently language packs available for the following languages +(in https://github.com/tesseract-ocr/tessdata):

+

afr (Afrikaans) +amh (Amharic) +ara (Arabic) +asm (Assamese) +aze (Azerbaijani) +aze_cyrl (Azerbaijani - Cyrilic) +bel (Belarusian) +ben (Bengali) +bod (Tibetan) +bos (Bosnian) +bul (Bulgarian) +cat (Catalan; Valencian) +ceb (Cebuano) +ces (Czech) +chi_sim (Chinese - Simplified) +chi_tra (Chinese - Traditional) +chr (Cherokee) +cym (Welsh) +dan (Danish) +dan_frak (Danish - Fraktur) +deu (German) +deu_frak (German - Fraktur) +dzo (Dzongkha) +ell (Greek, Modern (1453-)) +eng (English) +enm (English, Middle (1100-1500)) +epo (Esperanto) +equ (Math / equation detection module) +est (Estonian) +eus (Basque) +fas (Persian) +fin (Finnish) +fra (French) +frk (Frankish) +frm (French, Middle (ca.1400-1600)) +gle (Irish) +glg (Galician) +grc (Greek, Ancient (to 1453)) +guj (Gujarati) +hat (Haitian; Haitian Creole) +heb (Hebrew) +hin (Hindi) +hrv (Croatian) +hun (Hungarian) +iku (Inuktitut) +ind (Indonesian) +isl (Icelandic) +ita (Italian) +ita_old (Italian - Old) +jav (Javanese) +jpn (Japanese) +kan (Kannada) +kat (Georgian) +kat_old (Georgian - Old) +kaz (Kazakh) +khm (Central Khmer) +kir (Kirghiz; Kyrgyz) +kor (Korean) +kur (Kurdish) +lao (Lao) +lat (Latin) +lav (Latvian) +lit (Lithuanian) +mal (Malayalam) +mar (Marathi) +mkd (Macedonian) +mlt (Maltese) +msa (Malay) +mya (Burmese) +nep (Nepali) +nld (Dutch; Flemish) +nor (Norwegian) +ori (Oriya) +osd (Orientation and script detection module) +pan (Panjabi; Punjabi) +pol (Polish) +por (Portuguese) +pus (Pushto; Pashto) +ron (Romanian; Moldavian; Moldovan) +rus (Russian) +san (Sanskrit) +sin (Sinhala; Sinhalese) +slk (Slovak) +slk_frak (Slovak - Fraktur) +slv (Slovenian) +spa (Spanish; Castilian) +spa_old (Spanish; Castilian - Old) +sqi (Albanian) +srp (Serbian) +srp_latn (Serbian - Latin) +swa (Swahili) +swe (Swedish) +syr (Syriac) +tam (Tamil) +tel (Telugu) +tgk (Tajik) +tgl (Tagalog) +tha (Thai) +tir (Tigrinya) +tur (Turkish) +uig (Uighur; Uyghur) +ukr (Ukrainian) +urd (Urdu) +uzb (Uzbek) +uzb_cyrl (Uzbek - Cyrilic) +vie (Vietnamese) +yid (Yiddish)

+

To use a non-standard language pack named foo.traineddata, set the +TESSDATA_PREFIX environment variable so the file can be found at +TESSDATA_PREFIX/tessdata/foo.traineddata and give Tesseract the +argument -l foo.

+
+
+
+

CONFIG FILES AND AUGMENTING WITH USER DATA

+
+

Tesseract config files consist of lines with variable-value pairs (space +separated). The variables are documented as flags in the source code like +the following one in tesseractclass.h:

+

STRING_VAR_H(tessedit_char_blacklist, "", + "Blacklist of chars not to recognize");

+

These variables may enable or disable various features of the engine, and +may cause it to load (or not load) various data. For instance, let’s suppose +you want to OCR in English, but suppress the normal dictionary and load an +alternative word list and an alternative list of patterns — these two files +are the most commonly used extra data files.

+

If your language pack is in /path/to/eng.traineddata and the hocr config +is in /path/to/configs/hocr then create three new files:

+

/path/to/eng.user-words:

+
+
the
+quick
+brown
+fox
+jumped
+
+
+

/path/to/eng.user-patterns:

+
+
1-\d\d\d-GOOG-411
+www.\n\\\*.com
+
+
+

/path/to/configs/bazaar:

+
+
load_system_dawg     F
+load_freq_dawg       F
+user_words_suffix    user-words
+user_patterns_suffix user-patterns
+
+
+

Now, if you pass the word bazaar as a trailing command line parameter +to Tesseract, Tesseract will not bother loading the system dictionary nor +the dictionary of frequent words and will load and use the eng.user-words +and eng.user-patterns files you provided. The former is a simple word list, +one per line. The format of the latter is documented in dict/trie.h +on read_pattern_list().

+
+
+
+

HISTORY

+
+

The engine was developed at Hewlett Packard Laboratories Bristol and at +Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more +changes made in 1996 to port to Windows, and some C++izing in 1998. A +lot of the code was written in C, and then some more was written in C++. +The C\++ code makes heavy use of a list system using macros. This predates +stl, was portable before stl, and is more efficient than stl lists, but has +the big negative that if you do get a segmentation violation, it is hard to +debug.

+

Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability +to train Tesseract.

+

Tesseract was included in UNLV’s Fourth Annual Test of OCR Accuracy. +See https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf. With Tesseract 2.00, +scripts are now included to allow anyone to reproduce some of these tests. +See https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract for more +details.

+

Tesseract 3.00 adds a number of new languages, including Chinese, Japanese, +and Korean. It also introduces a new, single-file based system of managing +language data.

+

Tesseract 3.02 adds BiDirectional text support, the ability to recognize +multiple languages in a single image, and improved layout analysis.

+

For further details, see the file ReleaseNotes included with the distribution.

+
+
+ +
+

SEE ALSO

+
+

ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1), +shape_training(1), mftraining(1), unicharambigs(5), unicharset(5), +unicharset_extractor(1), wordlist2dawg(1)

+
+
+
+

AUTHOR

+
+

Tesseract development was led at Hewlett-Packard and Google by Ray Smith. +The development team has included:

+

Ahmad Abdulkader, Chris Newton, Dan Johnson, Dar-Shyang Lee, David Eger, +Eric Wiseblatt, Faisal Shafait, Hiroshi Takenaka, Joe Liu, Joern Wanke, +Mark Seaman, Mickey Namiki, Nicholas Beato, Oded Fuhrmann, Phil Cheatle, +Pingping Xiu, Pong Eksombatchai (Chantat), Ranjith Unnikrishnan, Raquel +Romano, Ray Smith, Rika Antonova, Robert Moss, Samuel Charron, Sheelagh +Lloyd, Shobhit Saxena, and Thomas Kielbus.

+
+
+
+

COPYING

+
+

Licensed under the Apache License, Version 2.0

+
+
+
+

+ + + diff --git a/doc/tesseract.1.xml b/doc/tesseract.1.xml index 842c5acd61..8ddce87cd6 100644 --- a/doc/tesseract.1.xml +++ b/doc/tesseract.1.xml @@ -1,424 +1,424 @@ - - - - - - - TESSERACT(1) - - -tesseract -1 -  -  - - - tesseract - command-line OCR engine - - -tesseract imagename|stdin outputbase|stdout [options…] [configfile…] - - -DESCRIPTION -tesseract(1) is a commercial quality OCR engine originally developed at HP -between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by -UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed -at Google since then. - - -IN/OUT ARGUMENTS - - - -imagename - - - - The name of the input image. Most image file formats (anything - readable by Leptonica) are supported. - - - - - -stdin - - - - Instruction to read data from standard input - - - - - -outputbase - - - - The basename of the output file (to which the appropriate extension - will be appended). By default the output will be named outbase.txt. - - - - - -stdout - - - - Instruction to sent output data to standard output - - - - - - -OPTIONS - - - ---tessdata-dir /path - - - - Specify the location of tessdata path - - - - - ---user-words /path/to/file - - - - Specify the location of user words file - - - - - ---user-patterns /path/to/file specify - - - - The location of user patterns file - - - - - --c configvar=value - - - - Set value for control parameter. Multiple -c arguments are allowed. - - - - - --l lang - - - - The language to use. If none is specified, English is assumed. - Multiple languages may be specified, separated by plus characters. - Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES) - - - - - ---psm N - - - - Set Tesseract to only run a subset of layout analysis and assume - a certain form of image. The options for N are: - -0 = Orientation and script detection (OSD) only. -1 = Automatic page segmentation with OSD. -2 = Automatic page segmentation, but no OSD, or OCR. -3 = Fully automatic page segmentation, but no OSD. (Default) -4 = Assume a single column of text of variable sizes. -5 = Assume a single uniform block of vertically aligned text. -6 = Assume a single uniform block of text. -7 = Treat the image as a single text line. -8 = Treat the image as a single word. -9 = Treat the image as a single word in a circle. -10 = Treat the image as a single character. - - - - -configfile - - - - The name of a config to use. A config is a plaintext file which - contains a list of variables and their values, one per line, with a - space separating variable from value. Interesting config files - include: - - - - -hocr - Output in hOCR format instead of as a text file. - - - - -pdf - Output in pdf instead of a text file. - - - - - - -Nota Bene: The options -l lang and --psm N must occur -before any configfile. - - -SINGLE OPTIONS - - - --v - - - - Returns the current version of the tesseract(1) executable. - - - - - ---list-langs - - - - list available languages for tesseract engine. Can be used with --tessdata-dir. - - - - - ---print-parameters - - - - print tesseract parameters to the stdout. - - - - - - -LANGUAGES -There are currently language packs available for the following languages -(in https://github.com/tesseract-ocr/tessdata): -afr (Afrikaans) -amh (Amharic) -ara (Arabic) -asm (Assamese) -aze (Azerbaijani) -aze_cyrl (Azerbaijani - Cyrilic) -bel (Belarusian) -ben (Bengali) -bod (Tibetan) -bos (Bosnian) -bul (Bulgarian) -cat (Catalan; Valencian) -ceb (Cebuano) -ces (Czech) -chi_sim (Chinese - Simplified) -chi_tra (Chinese - Traditional) -chr (Cherokee) -cym (Welsh) -dan (Danish) -dan_frak (Danish - Fraktur) -deu (German) -deu_frak (German - Fraktur) -dzo (Dzongkha) -ell (Greek, Modern (1453-)) -eng (English) -enm (English, Middle (1100-1500)) -epo (Esperanto) -equ (Math / equation detection module) -est (Estonian) -eus (Basque) -fas (Persian) -fin (Finnish) -fra (French) -frk (Frankish) -frm (French, Middle (ca.1400-1600)) -gle (Irish) -glg (Galician) -grc (Greek, Ancient (to 1453)) -guj (Gujarati) -hat (Haitian; Haitian Creole) -heb (Hebrew) -hin (Hindi) -hrv (Croatian) -hun (Hungarian) -iku (Inuktitut) -ind (Indonesian) -isl (Icelandic) -ita (Italian) -ita_old (Italian - Old) -jav (Javanese) -jpn (Japanese) -kan (Kannada) -kat (Georgian) -kat_old (Georgian - Old) -kaz (Kazakh) -khm (Central Khmer) -kir (Kirghiz; Kyrgyz) -kor (Korean) -kur (Kurdish) -lao (Lao) -lat (Latin) -lav (Latvian) -lit (Lithuanian) -mal (Malayalam) -mar (Marathi) -mkd (Macedonian) -mlt (Maltese) -msa (Malay) -mya (Burmese) -nep (Nepali) -nld (Dutch; Flemish) -nor (Norwegian) -ori (Oriya) -osd (Orientation and script detection module) -pan (Panjabi; Punjabi) -pol (Polish) -por (Portuguese) -pus (Pushto; Pashto) -ron (Romanian; Moldavian; Moldovan) -rus (Russian) -san (Sanskrit) -sin (Sinhala; Sinhalese) -slk (Slovak) -slk_frak (Slovak - Fraktur) -slv (Slovenian) -spa (Spanish; Castilian) -spa_old (Spanish; Castilian - Old) -sqi (Albanian) -srp (Serbian) -srp_latn (Serbian - Latin) -swa (Swahili) -swe (Swedish) -syr (Syriac) -tam (Tamil) -tel (Telugu) -tgk (Tajik) -tgl (Tagalog) -tha (Thai) -tir (Tigrinya) -tur (Turkish) -uig (Uighur; Uyghur) -ukr (Ukrainian) -urd (Urdu) -uzb (Uzbek) -uzb_cyrl (Uzbek - Cyrilic) -vie (Vietnamese) -yid (Yiddish) -To use a non-standard language pack named foo.traineddata, set the -TESSDATA_PREFIX environment variable so the file can be found at -TESSDATA_PREFIX/tessdata/foo.traineddata and give Tesseract the -argument -l foo. - - -CONFIG FILES AND AUGMENTING WITH USER DATA -Tesseract config files consist of lines with variable-value pairs (space -separated). The variables are documented as flags in the source code like -the following one in tesseractclass.h: -STRING_VAR_H(tessedit_char_blacklist, "", - "Blacklist of chars not to recognize"); -These variables may enable or disable various features of the engine, and -may cause it to load (or not load) various data. For instance, let’s suppose -you want to OCR in English, but suppress the normal dictionary and load an -alternative word list and an alternative list of patterns — these two files -are the most commonly used extra data files. -If your language pack is in /path/to/eng.traineddata and the hocr config -is in /path/to/configs/hocr then create three new files: -/path/to/eng.user-words: -
-the -quick -brown -fox -jumped -
-/path/to/eng.user-patterns: -
-1-\d\d\d-GOOG-411 -www.\n\\\*.com -
-/path/to/configs/bazaar: -
-load_system_dawg F -load_freq_dawg F -user_words_suffix user-words -user_patterns_suffix user-patterns -
-Now, if you pass the word bazaar as a trailing command line parameter -to Tesseract, Tesseract will not bother loading the system dictionary nor -the dictionary of frequent words and will load and use the eng.user-words -and eng.user-patterns files you provided. The former is a simple word list, -one per line. The format of the latter is documented in dict/trie.h -on read_pattern_list(). -
- -HISTORY -The engine was developed at Hewlett Packard Laboratories Bristol and at -Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more -changes made in 1996 to port to Windows, and some C++izing in 1998. A -lot of the code was written in C, and then some more was written in C++. -The C\++ code makes heavy use of a list system using macros. This predates -stl, was portable before stl, and is more efficient than stl lists, but has -the big negative that if you do get a segmentation violation, it is hard to -debug. -Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability -to train Tesseract. -Tesseract was included in UNLV’s Fourth Annual Test of OCR Accuracy. -See https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf. With Tesseract 2.00, -scripts are now included to allow anyone to reproduce some of these tests. -See https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract for more -details. -Tesseract 3.00 adds a number of new languages, including Chinese, Japanese, -and Korean. It also introduces a new, single-file based system of managing -language data. -Tesseract 3.02 adds BiDirectional text support, the ability to recognize -multiple languages in a single image, and improved layout analysis. -For further details, see the file ReleaseNotes included with the distribution. - - -RESOURCES -Main web site: https://github.com/tesseract-ocr -Information on training: https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract - - -SEE ALSO -ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1), -shape_training(1), mftraining(1), unicharambigs(5), unicharset(5), -unicharset_extractor(1), wordlist2dawg(1) - - -AUTHOR -Tesseract development was led at Hewlett-Packard and Google by Ray Smith. -The development team has included: -Ahmad Abdulkader, Chris Newton, Dan Johnson, Dar-Shyang Lee, David Eger, -Eric Wiseblatt, Faisal Shafait, Hiroshi Takenaka, Joe Liu, Joern Wanke, -Mark Seaman, Mickey Namiki, Nicholas Beato, Oded Fuhrmann, Phil Cheatle, -Pingping Xiu, Pong Eksombatchai (Chantat), Ranjith Unnikrishnan, Raquel -Romano, Ray Smith, Rika Antonova, Robert Moss, Samuel Charron, Sheelagh -Lloyd, Shobhit Saxena, and Thomas Kielbus. - - -COPYING -Licensed under the Apache License, Version 2.0 - -
+ + + + + + + TESSERACT(1) + + +tesseract +1 +  +  + + + tesseract + command-line OCR engine + + +tesseract imagename|stdin outputbase|stdout [options…] [configfile…] + + +DESCRIPTION +tesseract(1) is a commercial quality OCR engine originally developed at HP +between 1985 and 1995. In 1995, this engine was among the top 3 evaluated by +UNLV. It was open-sourced by HP and UNLV in 2005, and has been developed +at Google since then. + + +IN/OUT ARGUMENTS + + + +imagename + + + + The name of the input image. Most image file formats (anything + readable by Leptonica) are supported. + + + + + +stdin + + + + Instruction to read data from standard input + + + + + +outputbase + + + + The basename of the output file (to which the appropriate extension + will be appended). By default the output will be named outbase.txt. + + + + + +stdout + + + + Instruction to sent output data to standard output + + + + + + +OPTIONS + + + +--tessdata-dir /path + + + + Specify the location of tessdata path + + + + + +--user-words /path/to/file + + + + Specify the location of user words file + + + + + +--user-patterns /path/to/file specify + + + + The location of user patterns file + + + + + +-c configvar=value + + + + Set value for control parameter. Multiple -c arguments are allowed. + + + + + +-l lang + + + + The language to use. If none is specified, English is assumed. + Multiple languages may be specified, separated by plus characters. + Tesseract uses 3-character ISO 639-2 language codes. (See LANGUAGES) + + + + + +--psm N + + + + Set Tesseract to only run a subset of layout analysis and assume + a certain form of image. The options for N are: + +0 = Orientation and script detection (OSD) only. +1 = Automatic page segmentation with OSD. +2 = Automatic page segmentation, but no OSD, or OCR. +3 = Fully automatic page segmentation, but no OSD. (Default) +4 = Assume a single column of text of variable sizes. +5 = Assume a single uniform block of vertically aligned text. +6 = Assume a single uniform block of text. +7 = Treat the image as a single text line. +8 = Treat the image as a single word. +9 = Treat the image as a single word in a circle. +10 = Treat the image as a single character. + + + + +configfile + + + + The name of a config to use. A config is a plaintext file which + contains a list of variables and their values, one per line, with a + space separating variable from value. Interesting config files + include: + + + + +hocr - Output in hOCR format instead of as a text file. + + + + +pdf - Output in pdf instead of a text file. + + + + + + +Nota Bene: The options -l lang and --psm N must occur +before any configfile. + + +SINGLE OPTIONS + + + +-v + + + + Returns the current version of the tesseract(1) executable. + + + + + +--list-langs + + + + list available languages for tesseract engine. Can be used with --tessdata-dir. + + + + + +--print-parameters + + + + print tesseract parameters to the stdout. + + + + + + +LANGUAGES +There are currently language packs available for the following languages +(in https://github.com/tesseract-ocr/tessdata): +afr (Afrikaans) +amh (Amharic) +ara (Arabic) +asm (Assamese) +aze (Azerbaijani) +aze_cyrl (Azerbaijani - Cyrilic) +bel (Belarusian) +ben (Bengali) +bod (Tibetan) +bos (Bosnian) +bul (Bulgarian) +cat (Catalan; Valencian) +ceb (Cebuano) +ces (Czech) +chi_sim (Chinese - Simplified) +chi_tra (Chinese - Traditional) +chr (Cherokee) +cym (Welsh) +dan (Danish) +dan_frak (Danish - Fraktur) +deu (German) +deu_frak (German - Fraktur) +dzo (Dzongkha) +ell (Greek, Modern (1453-)) +eng (English) +enm (English, Middle (1100-1500)) +epo (Esperanto) +equ (Math / equation detection module) +est (Estonian) +eus (Basque) +fas (Persian) +fin (Finnish) +fra (French) +frk (Frankish) +frm (French, Middle (ca.1400-1600)) +gle (Irish) +glg (Galician) +grc (Greek, Ancient (to 1453)) +guj (Gujarati) +hat (Haitian; Haitian Creole) +heb (Hebrew) +hin (Hindi) +hrv (Croatian) +hun (Hungarian) +iku (Inuktitut) +ind (Indonesian) +isl (Icelandic) +ita (Italian) +ita_old (Italian - Old) +jav (Javanese) +jpn (Japanese) +kan (Kannada) +kat (Georgian) +kat_old (Georgian - Old) +kaz (Kazakh) +khm (Central Khmer) +kir (Kirghiz; Kyrgyz) +kor (Korean) +kur (Kurdish) +lao (Lao) +lat (Latin) +lav (Latvian) +lit (Lithuanian) +mal (Malayalam) +mar (Marathi) +mkd (Macedonian) +mlt (Maltese) +msa (Malay) +mya (Burmese) +nep (Nepali) +nld (Dutch; Flemish) +nor (Norwegian) +ori (Oriya) +osd (Orientation and script detection module) +pan (Panjabi; Punjabi) +pol (Polish) +por (Portuguese) +pus (Pushto; Pashto) +ron (Romanian; Moldavian; Moldovan) +rus (Russian) +san (Sanskrit) +sin (Sinhala; Sinhalese) +slk (Slovak) +slk_frak (Slovak - Fraktur) +slv (Slovenian) +spa (Spanish; Castilian) +spa_old (Spanish; Castilian - Old) +sqi (Albanian) +srp (Serbian) +srp_latn (Serbian - Latin) +swa (Swahili) +swe (Swedish) +syr (Syriac) +tam (Tamil) +tel (Telugu) +tgk (Tajik) +tgl (Tagalog) +tha (Thai) +tir (Tigrinya) +tur (Turkish) +uig (Uighur; Uyghur) +ukr (Ukrainian) +urd (Urdu) +uzb (Uzbek) +uzb_cyrl (Uzbek - Cyrilic) +vie (Vietnamese) +yid (Yiddish) +To use a non-standard language pack named foo.traineddata, set the +TESSDATA_PREFIX environment variable so the file can be found at +TESSDATA_PREFIX/tessdata/foo.traineddata and give Tesseract the +argument -l foo. + + +CONFIG FILES AND AUGMENTING WITH USER DATA +Tesseract config files consist of lines with variable-value pairs (space +separated). The variables are documented as flags in the source code like +the following one in tesseractclass.h: +STRING_VAR_H(tessedit_char_blacklist, "", + "Blacklist of chars not to recognize"); +These variables may enable or disable various features of the engine, and +may cause it to load (or not load) various data. For instance, let’s suppose +you want to OCR in English, but suppress the normal dictionary and load an +alternative word list and an alternative list of patterns — these two files +are the most commonly used extra data files. +If your language pack is in /path/to/eng.traineddata and the hocr config +is in /path/to/configs/hocr then create three new files: +/path/to/eng.user-words: +
+the +quick +brown +fox +jumped +
+/path/to/eng.user-patterns: +
+1-\d\d\d-GOOG-411 +www.\n\\\*.com +
+/path/to/configs/bazaar: +
+load_system_dawg F +load_freq_dawg F +user_words_suffix user-words +user_patterns_suffix user-patterns +
+Now, if you pass the word bazaar as a trailing command line parameter +to Tesseract, Tesseract will not bother loading the system dictionary nor +the dictionary of frequent words and will load and use the eng.user-words +and eng.user-patterns files you provided. The former is a simple word list, +one per line. The format of the latter is documented in dict/trie.h +on read_pattern_list(). +
+ +HISTORY +The engine was developed at Hewlett Packard Laboratories Bristol and at +Hewlett Packard Co, Greeley Colorado between 1985 and 1994, with some more +changes made in 1996 to port to Windows, and some C++izing in 1998. A +lot of the code was written in C, and then some more was written in C++. +The C\++ code makes heavy use of a list system using macros. This predates +stl, was portable before stl, and is more efficient than stl lists, but has +the big negative that if you do get a segmentation violation, it is hard to +debug. +Version 2.00 brought Unicode (UTF-8) support, six languages, and the ability +to train Tesseract. +Tesseract was included in UNLV’s Fourth Annual Test of OCR Accuracy. +See https://github.com/tesseract-ocr/docs/blob/master/AT-1995.pdf. With Tesseract 2.00, +scripts are now included to allow anyone to reproduce some of these tests. +See https://github.com/tesseract-ocr/tesseract/wiki/TestingTesseract for more +details. +Tesseract 3.00 adds a number of new languages, including Chinese, Japanese, +and Korean. It also introduces a new, single-file based system of managing +language data. +Tesseract 3.02 adds BiDirectional text support, the ability to recognize +multiple languages in a single image, and improved layout analysis. +For further details, see the file ReleaseNotes included with the distribution. + + +RESOURCES +Main web site: https://github.com/tesseract-ocr +Information on training: https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract + + +SEE ALSO +ambiguous_words(1), cntraining(1), combine_tessdata(1), dawg2wordlist(1), +shape_training(1), mftraining(1), unicharambigs(5), unicharset(5), +unicharset_extractor(1), wordlist2dawg(1) + + +AUTHOR +Tesseract development was led at Hewlett-Packard and Google by Ray Smith. +The development team has included: +Ahmad Abdulkader, Chris Newton, Dan Johnson, Dar-Shyang Lee, David Eger, +Eric Wiseblatt, Faisal Shafait, Hiroshi Takenaka, Joe Liu, Joern Wanke, +Mark Seaman, Mickey Namiki, Nicholas Beato, Oded Fuhrmann, Phil Cheatle, +Pingping Xiu, Pong Eksombatchai (Chantat), Ranjith Unnikrishnan, Raquel +Romano, Ray Smith, Rika Antonova, Robert Moss, Samuel Charron, Sheelagh +Lloyd, Shobhit Saxena, and Thomas Kielbus. + + +COPYING +Licensed under the Apache License, Version 2.0 + +
diff --git a/doc/unicharambigs.5.asc b/doc/unicharambigs.5.asc index 7ce25e4478..079f6d53de 100644 --- a/doc/unicharambigs.5.asc +++ b/doc/unicharambigs.5.asc @@ -38,7 +38,7 @@ EXAMPLE 3 i i i 1 m 0 ............................... -In this example, all instances of the '2' character sequence '''' will +In this example, all instances of the '2' character sequence '''' will *always* be replaced by the '1' character sequence '"'; a '1' character sequence 'm' *may* be replaced by the '2' character sequence 'rn', and the '3' character sequence *may* be replaced by the '1' character diff --git a/doc/unicharambigs.5.html b/doc/unicharambigs.5.html index c6a645e69c..bb9fb291a3 100644 --- a/doc/unicharambigs.5.html +++ b/doc/unicharambigs.5.html @@ -1,875 +1,875 @@ - - - - - -UNICHARAMBIGS(5) - - - - - -
-
-

DESCRIPTION

-
-

The unicharambigs file (a component of traineddata, see combine_tessdata(1) ) -is used by Tesseract to represent possible ambiguities between characters, -or groups of characters.

-

The file contains a number of lines, laid out as follow:

-
-
-
[num] <TAB> [char(s)] <TAB> [num] <TAB> [char(s)] <TAB> [num]
-
-
- - - - - - - - - - - - - - - - - - - - -
-Field one -
-
-

-the number of characters contained in field two -

-
-Field two -
-
-

-the character sequence to be replaced -

-
-Field three -
-
-

-the number of characters contained in field four -

-
-Field four -
-
-

-the character sequence used to replace field two -

-
-Field five -
-
-

-contains either 1 or 0. 1 denotes a mandatory -replacement, 0 denotes an optional replacement. -

-
-

Characters appearing in fields two and four should appear in -unicharset. The numbers in fields one and three refer to the -number of unichars (not bytes).

-
-
-
-

EXAMPLE

-
-
-
-
2       ' '     1       "     1
-1       m       2       r n   0
-3       i i i   1       m     0
-
-

In this example, all instances of the 2 character sequence '' will -always be replaced by the 1 character sequence "; a 1 character -sequence m may be replaced by the 2 character sequence rn, and -the 3 character sequence may be replaced by the 1 character -sequence m.

-
-
-
-

HISTORY

-
-

The unicharambigs file first appeared in Tesseract 3.00; prior to that, a -similar format, called DangAmbigs (dangerous ambiguities) was used: the -format was almost identical, except only mandatory replacements could be -specified, and field 5 was absent.

-
-
-
-

BUGS

-
-

This is a documentation "bug": it’s not currently clear what should be done -in the case of ligatures (such as fi) which may also appear as regular -letters in the unicharset.

-
-
-
-

SEE ALSO

-
-

tesseract(1), unicharset(5)

-
-
-
-

AUTHOR

-
-

The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present).

-
-
-
-

- - - + + + + + +UNICHARAMBIGS(5) + + + + + +
+
+

DESCRIPTION

+
+

The unicharambigs file (a component of traineddata, see combine_tessdata(1) ) +is used by Tesseract to represent possible ambiguities between characters, +or groups of characters.

+

The file contains a number of lines, laid out as follow:

+
+
+
[num] <TAB> [char(s)] <TAB> [num] <TAB> [char(s)] <TAB> [num]
+
+
+ + + + + + + + + + + + + + + + + + + + +
+Field one +
+
+

+the number of characters contained in field two +

+
+Field two +
+
+

+the character sequence to be replaced +

+
+Field three +
+
+

+the number of characters contained in field four +

+
+Field four +
+
+

+the character sequence used to replace field two +

+
+Field five +
+
+

+contains either 1 or 0. 1 denotes a mandatory +replacement, 0 denotes an optional replacement. +

+
+

Characters appearing in fields two and four should appear in +unicharset. The numbers in fields one and three refer to the +number of unichars (not bytes).

+
+
+
+

EXAMPLE

+
+
+
+
2       ' '     1       "     1
+1       m       2       r n   0
+3       i i i   1       m     0
+
+

In this example, all instances of the 2 character sequence '' will +always be replaced by the 1 character sequence "; a 1 character +sequence m may be replaced by the 2 character sequence rn, and +the 3 character sequence may be replaced by the 1 character +sequence m.

+
+
+
+

HISTORY

+
+

The unicharambigs file first appeared in Tesseract 3.00; prior to that, a +similar format, called DangAmbigs (dangerous ambiguities) was used: the +format was almost identical, except only mandatory replacements could be +specified, and field 5 was absent.

+
+
+
+

BUGS

+
+

This is a documentation "bug": it’s not currently clear what should be done +in the case of ligatures (such as fi) which may also appear as regular +letters in the unicharset.

+
+
+
+

SEE ALSO

+
+

tesseract(1), unicharset(5)

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/doc/unicharambigs.5.xml b/doc/unicharambigs.5.xml index 75b3c66431..cbc0f50e50 100644 --- a/doc/unicharambigs.5.xml +++ b/doc/unicharambigs.5.xml @@ -1,126 +1,126 @@ - - - - - - - UNICHARAMBIGS(5) - - -unicharambigs -5 -  -  - - - unicharambigs - Tesseract unicharset ambiguities - - -DESCRIPTION -The unicharambigs file (a component of traineddata, see combine_tessdata(1) ) -is used by Tesseract to represent possible ambiguities between characters, -or groups of characters. -The file contains a number of lines, laid out as follow: -[num] <TAB> [char(s)] <TAB> [num] <TAB> [char(s)] <TAB> [num] - - - - -Field one - - - - -the number of characters contained in field two - - - - - - -Field two - - - - -the character sequence to be replaced - - - - - - -Field three - - - - -the number of characters contained in field four - - - - - - -Field four - - - - -the character sequence used to replace field two - - - - - - -Field five - - - - -contains either 1 or 0. 1 denotes a mandatory -replacement, 0 denotes an optional replacement. - - - - -Characters appearing in fields two and four should appear in -unicharset. The numbers in fields one and three refer to the -number of unichars (not bytes). - - -EXAMPLE -2 ' ' 1 " 1 -1 m 2 r n 0 -3 i i i 1 m 0 -In this example, all instances of the 2 character sequence '' will -always be replaced by the 1 character sequence "; a 1 character -sequence m may be replaced by the 2 character sequence rn, and -the 3 character sequence may be replaced by the 1 character -sequence m. - - -HISTORY -The unicharambigs file first appeared in Tesseract 3.00; prior to that, a -similar format, called DangAmbigs (dangerous ambiguities) was used: the -format was almost identical, except only mandatory replacements could be -specified, and field 5 was absent. - - -BUGS -This is a documentation "bug": it’s not currently clear what should be done -in the case of ligatures (such as fi) which may also appear as regular -letters in the unicharset. - - -SEE ALSO -tesseract(1), unicharset(5) - - -AUTHOR -The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present). - - + + + + + + + UNICHARAMBIGS(5) + + +unicharambigs +5 +  +  + + + unicharambigs + Tesseract unicharset ambiguities + + +DESCRIPTION +The unicharambigs file (a component of traineddata, see combine_tessdata(1) ) +is used by Tesseract to represent possible ambiguities between characters, +or groups of characters. +The file contains a number of lines, laid out as follow: +[num] <TAB> [char(s)] <TAB> [num] <TAB> [char(s)] <TAB> [num] + + + + +Field one + + + + +the number of characters contained in field two + + + + + + +Field two + + + + +the character sequence to be replaced + + + + + + +Field three + + + + +the number of characters contained in field four + + + + + + +Field four + + + + +the character sequence used to replace field two + + + + + + +Field five + + + + +contains either 1 or 0. 1 denotes a mandatory +replacement, 0 denotes an optional replacement. + + + + +Characters appearing in fields two and four should appear in +unicharset. The numbers in fields one and three refer to the +number of unichars (not bytes). + + +EXAMPLE +2 ' ' 1 " 1 +1 m 2 r n 0 +3 i i i 1 m 0 +In this example, all instances of the 2 character sequence '' will +always be replaced by the 1 character sequence "; a 1 character +sequence m may be replaced by the 2 character sequence rn, and +the 3 character sequence may be replaced by the 1 character +sequence m. + + +HISTORY +The unicharambigs file first appeared in Tesseract 3.00; prior to that, a +similar format, called DangAmbigs (dangerous ambiguities) was used: the +format was almost identical, except only mandatory replacements could be +specified, and field 5 was absent. + + +BUGS +This is a documentation "bug": it’s not currently clear what should be done +in the case of ligatures (such as fi) which may also appear as regular +letters in the unicharset. + + +SEE ALSO +tesseract(1), unicharset(5) + + +AUTHOR +The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present). + + diff --git a/doc/unicharset.5.html b/doc/unicharset.5.html index 0f16c9e5e5..f3c3e7a9fc 100644 --- a/doc/unicharset.5.html +++ b/doc/unicharset.5.html @@ -1,965 +1,965 @@ - - - - - -UNICHARSET(5) - - - - - -
-
-

DESCRIPTION

-
-

Tesseract’s unicharset file contains information on each symbol -(unichar) the Tesseract OCR engine is trained to recognize.

-

A unicharset file (i.e. eng.unicharset) is distributed as part of a -Tesseract language pack (i.e. eng.traineddata). For information on -extracting the unicharset file, see combine_tessdata(1).

-

The first line of a unicharset file contains the number of unichars in -the file. After this line, each subsequent line provides information for -a single unichar. The first such line contains a placeholder reserved for -the space character. Each unichar is referred to within Tesseract by its -Unichar ID, which is the line number (minus 1) within the unicharset file. -Therefore, space gets unichar 0.

-

Each unichar line in the unicharset file (v2+) may have four space-separated fields:

-
-
-
'character' 'properties' 'script' 'id'
-
-

Starting with Tesseract v3.02, more information may be given for each unichar:

-
-
-
'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'
-
-

Entries:

-
-
-character -
-
-

-The UTF-8 encoded string to be produced for this unichar. -

-
-
-properties -
-
-

-An integer mask of character properties, one per bit. - From least to most significant bit, these are: isalpha, islower, isupper, - isdigit, ispunctuation. -

-
-
-glyph_metrics -
-
-

-Ten comma-separated integers representing various standards - for where this glyph is to be found within a baseline-normalized coordinate - system where 128 is normalized to x-height. -

-
    -
  • -

    -min_bottom, max_bottom: the ranges where the bottom of the character can - be found. -

    -
  • -
  • -

    -min_top, max_top: the ranges where the top of the character may be found. -

    -
  • -
  • -

    -min_width, max_width: horizontal width of the character. -

    -
  • -
  • -

    -min_bearing, max_bearing: how far from the usual start position does the - leftmost part of the character begin. -

    -
  • -
  • -

    -min_advance, max_advance: how far from the printer’s cell left do we - advance to begin the next character. -

    -
  • -
-
-
-script -
-
-

-Name of the script (Latin, Common, Greek, Cyrillic, Han, null). -

-
-
-other_case -
-
-

-The Unichar ID of the other case version of this character - (upper or lower). -

-
-
-direction -
-
-

-The Unicode BiDi direction of this character, as defined by - ICU’s enum UCharDirection. (0 = Left to Right, 1 = Right to Left, - 2 = European Number…) -

-
-
-mirror -
-
-

-The Unichar ID of the BiDirectional mirror of this character. - For example the mirror of open paren is close paren, but Latin Capital C - has no mirror, so it remains a Latin Capital C. -

-
-
-normed_form -
-
-

-The UTF-8 representation of a "normalized form" of this unichar - for the purpose of blaming a module for errors given ground truth text. - For instance, a left or right single quote may normalize to an ASCII quote. -

-
-
-
-
-
-

EXAMPLE (v2)

-
-
-
-
; 10 Common 46
-b 3 Latin 59
-W 5 Latin 40
-7 8 Common 66
-= 0 Common 93
-
-

";" is a punctuation character. Its properties are thus represented by the -binary number 10000 (10 in hexadecimal).

-

"b" is an alphabetic character and a lower case character. Its properties are -thus represented by the binary number 00011 (3 in hexadecimal).

-

"W" is an alphabetic character and an upper case character. Its properties are -thus represented by the binary number 00101 (5 in hexadecimal).

-

"7" is just a digit. Its properties are thus represented by the binary number -01000 (8 in hexadecimal).

-

"=" is not punctuation nor a digit nor an alphabetic character. Its properties -are thus represented by the binary number 00000 (0 in hexadecimal).

-

Japanese or Chinese alphabetic character properties are represented by the -binary number 00001 (1 in hexadecimal): they are alphabetic, but neither -upper nor lower case.

-
-
-
-

EXAMPLE (v3.02)

-
-
-
-
110
-NULL 0 NULL 0
-N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N
-Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y
-1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1
-9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9
-a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a
-. . .
-
-
-
-
-

CAVEATS

-
-

Although the unicharset reader maintains the ability to read unicharsets -of older formats and will assign default values to missing fields, -the accuracy will be degraded.

-

Further, most other data files are indexed by the unicharset file, -so changing it without re-generating the others is likely to have dire -consequences.

-
-
-
-

HISTORY

-
-

The unicharset format first appeared with Tesseract 2.00, which was the -first version to support languages other than English. The unicharset file -contained only the first two fields, and the "ispunctuation" property was -absent (punctuation was regarded as "0", as "=" is in the above example.

-
-
-
-

SEE ALSO

-
-

tesseract(1), combine_tessdata(1), unicharset_extractor(1)

- -
-
-
-

AUTHOR

-
-

The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present).

-
-
-
-

- - - + + + + + +UNICHARSET(5) + + + + + +
+
+

DESCRIPTION

+
+

Tesseract’s unicharset file contains information on each symbol +(unichar) the Tesseract OCR engine is trained to recognize.

+

A unicharset file (i.e. eng.unicharset) is distributed as part of a +Tesseract language pack (i.e. eng.traineddata). For information on +extracting the unicharset file, see combine_tessdata(1).

+

The first line of a unicharset file contains the number of unichars in +the file. After this line, each subsequent line provides information for +a single unichar. The first such line contains a placeholder reserved for +the space character. Each unichar is referred to within Tesseract by its +Unichar ID, which is the line number (minus 1) within the unicharset file. +Therefore, space gets unichar 0.

+

Each unichar line in the unicharset file (v2+) may have four space-separated fields:

+
+
+
'character' 'properties' 'script' 'id'
+
+

Starting with Tesseract v3.02, more information may be given for each unichar:

+
+
+
'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form'
+
+

Entries:

+
+
+character +
+
+

+The UTF-8 encoded string to be produced for this unichar. +

+
+
+properties +
+
+

+An integer mask of character properties, one per bit. + From least to most significant bit, these are: isalpha, islower, isupper, + isdigit, ispunctuation. +

+
+
+glyph_metrics +
+
+

+Ten comma-separated integers representing various standards + for where this glyph is to be found within a baseline-normalized coordinate + system where 128 is normalized to x-height. +

+
    +
  • +

    +min_bottom, max_bottom: the ranges where the bottom of the character can + be found. +

    +
  • +
  • +

    +min_top, max_top: the ranges where the top of the character may be found. +

    +
  • +
  • +

    +min_width, max_width: horizontal width of the character. +

    +
  • +
  • +

    +min_bearing, max_bearing: how far from the usual start position does the + leftmost part of the character begin. +

    +
  • +
  • +

    +min_advance, max_advance: how far from the printer’s cell left do we + advance to begin the next character. +

    +
  • +
+
+
+script +
+
+

+Name of the script (Latin, Common, Greek, Cyrillic, Han, null). +

+
+
+other_case +
+
+

+The Unichar ID of the other case version of this character + (upper or lower). +

+
+
+direction +
+
+

+The Unicode BiDi direction of this character, as defined by + ICU’s enum UCharDirection. (0 = Left to Right, 1 = Right to Left, + 2 = European Number…) +

+
+
+mirror +
+
+

+The Unichar ID of the BiDirectional mirror of this character. + For example the mirror of open paren is close paren, but Latin Capital C + has no mirror, so it remains a Latin Capital C. +

+
+
+normed_form +
+
+

+The UTF-8 representation of a "normalized form" of this unichar + for the purpose of blaming a module for errors given ground truth text. + For instance, a left or right single quote may normalize to an ASCII quote. +

+
+
+
+
+
+

EXAMPLE (v2)

+
+
+
+
; 10 Common 46
+b 3 Latin 59
+W 5 Latin 40
+7 8 Common 66
+= 0 Common 93
+
+

";" is a punctuation character. Its properties are thus represented by the +binary number 10000 (10 in hexadecimal).

+

"b" is an alphabetic character and a lower case character. Its properties are +thus represented by the binary number 00011 (3 in hexadecimal).

+

"W" is an alphabetic character and an upper case character. Its properties are +thus represented by the binary number 00101 (5 in hexadecimal).

+

"7" is just a digit. Its properties are thus represented by the binary number +01000 (8 in hexadecimal).

+

"=" is not punctuation nor a digit nor an alphabetic character. Its properties +are thus represented by the binary number 00000 (0 in hexadecimal).

+

Japanese or Chinese alphabetic character properties are represented by the +binary number 00001 (1 in hexadecimal): they are alphabetic, but neither +upper nor lower case.

+
+
+
+

EXAMPLE (v3.02)

+
+
+
+
110
+NULL 0 NULL 0
+N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N
+Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y
+1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1
+9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9
+a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a
+. . .
+
+
+
+
+

CAVEATS

+
+

Although the unicharset reader maintains the ability to read unicharsets +of older formats and will assign default values to missing fields, +the accuracy will be degraded.

+

Further, most other data files are indexed by the unicharset file, +so changing it without re-generating the others is likely to have dire +consequences.

+
+
+
+

HISTORY

+
+

The unicharset format first appeared with Tesseract 2.00, which was the +first version to support languages other than English. The unicharset file +contained only the first two fields, and the "ispunctuation" property was +absent (punctuation was regarded as "0", as "=" is in the above example.

+
+
+
+

SEE ALSO

+
+

tesseract(1), combine_tessdata(1), unicharset_extractor(1)

+ +
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/doc/unicharset.5.xml b/doc/unicharset.5.xml index 9ae6257e60..40e03c6eea 100644 --- a/doc/unicharset.5.xml +++ b/doc/unicharset.5.xml @@ -1,219 +1,219 @@ - - - - - - - UNICHARSET(5) - - -unicharset -5 -  -  - - - unicharset - character properties file used by tesseract(1) - - -DESCRIPTION -Tesseract’s unicharset file contains information on each symbol -(unichar) the Tesseract OCR engine is trained to recognize. -A unicharset file (i.e. eng.unicharset) is distributed as part of a -Tesseract language pack (i.e. eng.traineddata). For information on -extracting the unicharset file, see combine_tessdata(1). -The first line of a unicharset file contains the number of unichars in -the file. After this line, each subsequent line provides information for -a single unichar. The first such line contains a placeholder reserved for -the space character. Each unichar is referred to within Tesseract by its -Unichar ID, which is the line number (minus 1) within the unicharset file. -Therefore, space gets unichar 0. -Each unichar line in the unicharset file (v2+) may have four space-separated fields: -'character' 'properties' 'script' 'id' -Starting with Tesseract v3.02, more information may be given for each unichar: -'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form' -Entries: - - - -character - - - -The UTF-8 encoded string to be produced for this unichar. - - - - - -properties - - - -An integer mask of character properties, one per bit. - From least to most significant bit, these are: isalpha, islower, isupper, - isdigit, ispunctuation. - - - - - -glyph_metrics - - - -Ten comma-separated integers representing various standards - for where this glyph is to be found within a baseline-normalized coordinate - system where 128 is normalized to x-height. - - - - -min_bottom, max_bottom: the ranges where the bottom of the character can - be found. - - - - -min_top, max_top: the ranges where the top of the character may be found. - - - - -min_width, max_width: horizontal width of the character. - - - - -min_bearing, max_bearing: how far from the usual start position does the - leftmost part of the character begin. - - - - -min_advance, max_advance: how far from the printer’s cell left do we - advance to begin the next character. - - - - - - - -script - - - -Name of the script (Latin, Common, Greek, Cyrillic, Han, null). - - - - - -other_case - - - -The Unichar ID of the other case version of this character - (upper or lower). - - - - - -direction - - - -The Unicode BiDi direction of this character, as defined by - ICU’s enum UCharDirection. (0 = Left to Right, 1 = Right to Left, - 2 = European Number…) - - - - - -mirror - - - -The Unichar ID of the BiDirectional mirror of this character. - For example the mirror of open paren is close paren, but Latin Capital C - has no mirror, so it remains a Latin Capital C. - - - - - -normed_form - - - -The UTF-8 representation of a "normalized form" of this unichar - for the purpose of blaming a module for errors given ground truth text. - For instance, a left or right single quote may normalize to an ASCII quote. - - - - - - -EXAMPLE (v2) -; 10 Common 46 -b 3 Latin 59 -W 5 Latin 40 -7 8 Common 66 -= 0 Common 93 -";" is a punctuation character. Its properties are thus represented by the -binary number 10000 (10 in hexadecimal). -"b" is an alphabetic character and a lower case character. Its properties are -thus represented by the binary number 00011 (3 in hexadecimal). -"W" is an alphabetic character and an upper case character. Its properties are -thus represented by the binary number 00101 (5 in hexadecimal). -"7" is just a digit. Its properties are thus represented by the binary number -01000 (8 in hexadecimal). -"=" is not punctuation nor a digit nor an alphabetic character. Its properties -are thus represented by the binary number 00000 (0 in hexadecimal). -Japanese or Chinese alphabetic character properties are represented by the -binary number 00001 (1 in hexadecimal): they are alphabetic, but neither -upper nor lower case. - - -EXAMPLE (v3.02) -110 -NULL 0 NULL 0 -N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N -Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y -1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1 -9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9 -a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a -. . . - - -CAVEATS -Although the unicharset reader maintains the ability to read unicharsets -of older formats and will assign default values to missing fields, -the accuracy will be degraded. -Further, most other data files are indexed by the unicharset file, -so changing it without re-generating the others is likely to have dire -consequences. - - -HISTORY -The unicharset format first appeared with Tesseract 2.00, which was the -first version to support languages other than English. The unicharset file -contained only the first two fields, and the "ispunctuation" property was -absent (punctuation was regarded as "0", as "=" is in the above example. - - -SEE ALSO -tesseract(1), combine_tessdata(1), unicharset_extractor(1) -https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract - - -AUTHOR -The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present). - - + + + + + + + UNICHARSET(5) + + +unicharset +5 +  +  + + + unicharset + character properties file used by tesseract(1) + + +DESCRIPTION +Tesseract’s unicharset file contains information on each symbol +(unichar) the Tesseract OCR engine is trained to recognize. +A unicharset file (i.e. eng.unicharset) is distributed as part of a +Tesseract language pack (i.e. eng.traineddata). For information on +extracting the unicharset file, see combine_tessdata(1). +The first line of a unicharset file contains the number of unichars in +the file. After this line, each subsequent line provides information for +a single unichar. The first such line contains a placeholder reserved for +the space character. Each unichar is referred to within Tesseract by its +Unichar ID, which is the line number (minus 1) within the unicharset file. +Therefore, space gets unichar 0. +Each unichar line in the unicharset file (v2+) may have four space-separated fields: +'character' 'properties' 'script' 'id' +Starting with Tesseract v3.02, more information may be given for each unichar: +'character' 'properties' 'glyph_metrics' 'script' 'other_case' 'direction' 'mirror' 'normed_form' +Entries: + + + +character + + + +The UTF-8 encoded string to be produced for this unichar. + + + + + +properties + + + +An integer mask of character properties, one per bit. + From least to most significant bit, these are: isalpha, islower, isupper, + isdigit, ispunctuation. + + + + + +glyph_metrics + + + +Ten comma-separated integers representing various standards + for where this glyph is to be found within a baseline-normalized coordinate + system where 128 is normalized to x-height. + + + + +min_bottom, max_bottom: the ranges where the bottom of the character can + be found. + + + + +min_top, max_top: the ranges where the top of the character may be found. + + + + +min_width, max_width: horizontal width of the character. + + + + +min_bearing, max_bearing: how far from the usual start position does the + leftmost part of the character begin. + + + + +min_advance, max_advance: how far from the printer’s cell left do we + advance to begin the next character. + + + + + + + +script + + + +Name of the script (Latin, Common, Greek, Cyrillic, Han, null). + + + + + +other_case + + + +The Unichar ID of the other case version of this character + (upper or lower). + + + + + +direction + + + +The Unicode BiDi direction of this character, as defined by + ICU’s enum UCharDirection. (0 = Left to Right, 1 = Right to Left, + 2 = European Number…) + + + + + +mirror + + + +The Unichar ID of the BiDirectional mirror of this character. + For example the mirror of open paren is close paren, but Latin Capital C + has no mirror, so it remains a Latin Capital C. + + + + + +normed_form + + + +The UTF-8 representation of a "normalized form" of this unichar + for the purpose of blaming a module for errors given ground truth text. + For instance, a left or right single quote may normalize to an ASCII quote. + + + + + + +EXAMPLE (v2) +; 10 Common 46 +b 3 Latin 59 +W 5 Latin 40 +7 8 Common 66 += 0 Common 93 +";" is a punctuation character. Its properties are thus represented by the +binary number 10000 (10 in hexadecimal). +"b" is an alphabetic character and a lower case character. Its properties are +thus represented by the binary number 00011 (3 in hexadecimal). +"W" is an alphabetic character and an upper case character. Its properties are +thus represented by the binary number 00101 (5 in hexadecimal). +"7" is just a digit. Its properties are thus represented by the binary number +01000 (8 in hexadecimal). +"=" is not punctuation nor a digit nor an alphabetic character. Its properties +are thus represented by the binary number 00000 (0 in hexadecimal). +Japanese or Chinese alphabetic character properties are represented by the +binary number 00001 (1 in hexadecimal): they are alphabetic, but neither +upper nor lower case. + + +EXAMPLE (v3.02) +110 +NULL 0 NULL 0 +N 5 59,68,216,255,87,236,0,27,104,227 Latin 11 0 1 N +Y 5 59,68,216,255,91,205,0,47,91,223 Latin 33 0 2 Y +1 8 59,69,203,255,45,128,0,66,74,173 Common 3 2 3 1 +9 8 18,66,203,255,89,156,0,39,104,173 Common 4 2 4 9 +a 3 58,65,186,198,85,164,0,26,97,185 Latin 56 0 5 a +. . . + + +CAVEATS +Although the unicharset reader maintains the ability to read unicharsets +of older formats and will assign default values to missing fields, +the accuracy will be degraded. +Further, most other data files are indexed by the unicharset file, +so changing it without re-generating the others is likely to have dire +consequences. + + +HISTORY +The unicharset format first appeared with Tesseract 2.00, which was the +first version to support languages other than English. The unicharset file +contained only the first two fields, and the "ispunctuation" property was +absent (punctuation was regarded as "0", as "=" is in the above example. + + +SEE ALSO +tesseract(1), combine_tessdata(1), unicharset_extractor(1) +https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract + + +AUTHOR +The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present). + + diff --git a/doc/unicharset_extractor.1.asc b/doc/unicharset_extractor.1.asc index c972783a8e..bde21ab3ba 100644 --- a/doc/unicharset_extractor.1.asc +++ b/doc/unicharset_extractor.1.asc @@ -11,9 +11,9 @@ SYNOPSIS DESCRIPTION ----------- -Tesseract needs to know the set of possible characters it can output. -To generate the unicharset data file, use the unicharset_extractor -program on the same training pages bounding box files as used for +Tesseract needs to know the set of possible characters it can output. +To generate the unicharset data file, use the unicharset_extractor +program on the same training pages bounding box files as used for clustering: unicharset_extractor fontfile_1.box fontfile_2.box ... @@ -21,19 +21,19 @@ clustering: The unicharset will be put into the file 'dir/unicharset', or simply './unicharset' if no output directory is provided. -Tesseract also needs to have access to character properties isalpha, -isdigit, isupper, islower, ispunctuation. all of this auxilury data +Tesseract also needs to have access to character properties isalpha, +isdigit, isupper, islower, ispunctuation. all of this auxilury data and more is encoded in this file. (See unicharset(5)) -If your system supports the wctype functions, these values will be set -automatically by unicharset_extractor and there is no need to edit the -unicharset file. On some older systems (eg Windows 95), the unicharset +If your system supports the wctype functions, these values will be set +automatically by unicharset_extractor and there is no need to edit the +unicharset file. On some older systems (eg Windows 95), the unicharset file must be edited by hand to add these property description codes. -*NOTE* The unicharset file must be regenerated whenever inttemp, normproto -and pffmtable are generated (i.e. they must all be recreated when the box -file is changed) as they have to be in sync. This is made easier than in -previous versions by running unicharset_extractor before mftraining and +*NOTE* The unicharset file must be regenerated whenever inttemp, normproto +and pffmtable are generated (i.e. they must all be recreated when the box +file is changed) as they have to be in sync. This is made easier than in +previous versions by running unicharset_extractor before mftraining and cntraining, and giving the unicharset to mftraining. SEE ALSO diff --git a/doc/unicharset_extractor.1.html b/doc/unicharset_extractor.1.html index a6ac9e898b..6fdeb5e953 100644 --- a/doc/unicharset_extractor.1.html +++ b/doc/unicharset_extractor.1.html @@ -1,815 +1,815 @@ - - - - - -UNICHARSET_EXTRACTOR(1) - - - - - -
-
-

SYNOPSIS

-
-

unicharset_extractor [-D dir] FILE

-
-
-
-

DESCRIPTION

-
-

Tesseract needs to know the set of possible characters it can output. -To generate the unicharset data file, use the unicharset_extractor -program on the same training pages bounding box files as used for -clustering:

-
-
-
unicharset_extractor fontfile_1.box fontfile_2.box ...
-
-

The unicharset will be put into the file dir/unicharset, or simply -./unicharset if no output directory is provided.

-

Tesseract also needs to have access to character properties isalpha, -isdigit, isupper, islower, ispunctuation. all of this auxilury data -and more is encoded in this file. (See unicharset(5))

-

If your system supports the wctype functions, these values will be set -automatically by unicharset_extractor and there is no need to edit the -unicharset file. On some older systems (eg Windows 95), the unicharset -file must be edited by hand to add these property description codes.

-

NOTE The unicharset file must be regenerated whenever inttemp, normproto -and pffmtable are generated (i.e. they must all be recreated when the box -file is changed) as they have to be in sync. This is made easier than in -previous versions by running unicharset_extractor before mftraining and -cntraining, and giving the unicharset to mftraining.

-
-
- -
-

HISTORY

-
-

unicharset_extractor first appeared in Tesseract 2.00.

-
-
-
-

COPYING

-
-

Copyright (C) 2006, Google Inc. -Licensed under the Apache License, Version 2.0

-
-
-
-

AUTHOR

-
-

The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present).

-
-
-
-

- - - + + + + + +UNICHARSET_EXTRACTOR(1) + + + + + +
+
+

SYNOPSIS

+
+

unicharset_extractor [-D dir] FILE

+
+
+
+

DESCRIPTION

+
+

Tesseract needs to know the set of possible characters it can output. +To generate the unicharset data file, use the unicharset_extractor +program on the same training pages bounding box files as used for +clustering:

+
+
+
unicharset_extractor fontfile_1.box fontfile_2.box ...
+
+

The unicharset will be put into the file dir/unicharset, or simply +./unicharset if no output directory is provided.

+

Tesseract also needs to have access to character properties isalpha, +isdigit, isupper, islower, ispunctuation. all of this auxilury data +and more is encoded in this file. (See unicharset(5))

+

If your system supports the wctype functions, these values will be set +automatically by unicharset_extractor and there is no need to edit the +unicharset file. On some older systems (eg Windows 95), the unicharset +file must be edited by hand to add these property description codes.

+

NOTE The unicharset file must be regenerated whenever inttemp, normproto +and pffmtable are generated (i.e. they must all be recreated when the box +file is changed) as they have to be in sync. This is made easier than in +previous versions by running unicharset_extractor before mftraining and +cntraining, and giving the unicharset to mftraining.

+
+
+ +
+

HISTORY

+
+

unicharset_extractor first appeared in Tesseract 2.00.

+
+
+
+

COPYING

+
+

Copyright (C) 2006, Google Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/doc/unicharset_extractor.1.xml b/doc/unicharset_extractor.1.xml index bea4d1e16e..45087a8c64 100644 --- a/doc/unicharset_extractor.1.xml +++ b/doc/unicharset_extractor.1.xml @@ -1,63 +1,63 @@ - - - - - - - UNICHARSET_EXTRACTOR(1) - - -unicharset_extractor -1 -  -  - - - unicharset_extractor - extract unicharset from Tesseract boxfiles - - -unicharset_extractor [-D dir] FILE - - -DESCRIPTION -Tesseract needs to know the set of possible characters it can output. -To generate the unicharset data file, use the unicharset_extractor -program on the same training pages bounding box files as used for -clustering: -unicharset_extractor fontfile_1.box fontfile_2.box ... -The unicharset will be put into the file dir/unicharset, or simply -./unicharset if no output directory is provided. -Tesseract also needs to have access to character properties isalpha, -isdigit, isupper, islower, ispunctuation. all of this auxilury data -and more is encoded in this file. (See unicharset(5)) -If your system supports the wctype functions, these values will be set -automatically by unicharset_extractor and there is no need to edit the -unicharset file. On some older systems (eg Windows 95), the unicharset -file must be edited by hand to add these property description codes. -NOTE The unicharset file must be regenerated whenever inttemp, normproto -and pffmtable are generated (i.e. they must all be recreated when the box -file is changed) as they have to be in sync. This is made easier than in -previous versions by running unicharset_extractor before mftraining and -cntraining, and giving the unicharset to mftraining. - - -SEE ALSO -tesseract(1), unicharset(5) -https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract - - -HISTORY -unicharset_extractor first appeared in Tesseract 2.00. - - -COPYING -Copyright (C) 2006, Google Inc. -Licensed under the Apache License, Version 2.0 - - -AUTHOR -The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present). - - + + + + + + + UNICHARSET_EXTRACTOR(1) + + +unicharset_extractor +1 +  +  + + + unicharset_extractor + extract unicharset from Tesseract boxfiles + + +unicharset_extractor [-D dir] FILE + + +DESCRIPTION +Tesseract needs to know the set of possible characters it can output. +To generate the unicharset data file, use the unicharset_extractor +program on the same training pages bounding box files as used for +clustering: +unicharset_extractor fontfile_1.box fontfile_2.box ... +The unicharset will be put into the file dir/unicharset, or simply +./unicharset if no output directory is provided. +Tesseract also needs to have access to character properties isalpha, +isdigit, isupper, islower, ispunctuation. all of this auxilury data +and more is encoded in this file. (See unicharset(5)) +If your system supports the wctype functions, these values will be set +automatically by unicharset_extractor and there is no need to edit the +unicharset file. On some older systems (eg Windows 95), the unicharset +file must be edited by hand to add these property description codes. +NOTE The unicharset file must be regenerated whenever inttemp, normproto +and pffmtable are generated (i.e. they must all be recreated when the box +file is changed) as they have to be in sync. This is made easier than in +previous versions by running unicharset_extractor before mftraining and +cntraining, and giving the unicharset to mftraining. + + +SEE ALSO +tesseract(1), unicharset(5) +https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract + + +HISTORY +unicharset_extractor first appeared in Tesseract 2.00. + + +COPYING +Copyright (C) 2006, Google Inc. +Licensed under the Apache License, Version 2.0 + + +AUTHOR +The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present). + + diff --git a/doc/wordlist2dawg.1.html b/doc/wordlist2dawg.1.html index 58e5cab4fa..733570511a 100644 --- a/doc/wordlist2dawg.1.html +++ b/doc/wordlist2dawg.1.html @@ -1,820 +1,820 @@ - - - - - -WORDLIST2DAWG(1) - - - - - -
-
-

SYNOPSIS

-
-

wordlist2dawg WORDLIST DAWG lang.unicharset

-

wordlist2dawg -t WORDLIST DAWG lang.unicharset

-

wordlist2dawg -r 1 WORDLIST DAWG lang.unicharset

-

wordlist2dawg -r 2 WORDLIST DAWG lang.unicharset

-

wordlist2dawg -l <short> <long> WORDLIST DAWG lang.unicharset

-
-
-
-

DESCRIPTION

-
-

wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph -(DAWG) for use with Tesseract. A DAWG is a compressed, space and time -efficient representation of a word list.

-
-
-
-

OPTIONS

-
-

-t - Verify that a given dawg file is equivalent to a given wordlist.

-

-r 1 - Reverse a word if it contains an RTL character.

-

-r 2 - Reverse all words.

-

-l <short> <long> - Produce a file with several dawgs in it, one each for words - of length <short>, <short+1>,… <long>

-
-
-
-

ARGUMENTS

-
-

WORDLIST - A plain text file in UTF-8, one word per line.

-

DAWG - The output DAWG to write.

-

lang.unicharset - The unicharset of the language. This is the unicharset - generated by mftraining(1).

-
-
-
-

SEE ALSO

-
-

tesseract(1), combine_tessdata(1), dawg2wordlist(1)

- -
-
-
-

COPYING

-
-

Copyright (C) 2006 Google, Inc. -Licensed under the Apache License, Version 2.0

-
-
-
-

AUTHOR

-
-

The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present).

-
-
-
-

- - - + + + + + +WORDLIST2DAWG(1) + + + + + +
+
+

SYNOPSIS

+
+

wordlist2dawg WORDLIST DAWG lang.unicharset

+

wordlist2dawg -t WORDLIST DAWG lang.unicharset

+

wordlist2dawg -r 1 WORDLIST DAWG lang.unicharset

+

wordlist2dawg -r 2 WORDLIST DAWG lang.unicharset

+

wordlist2dawg -l <short> <long> WORDLIST DAWG lang.unicharset

+
+
+
+

DESCRIPTION

+
+

wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph +(DAWG) for use with Tesseract. A DAWG is a compressed, space and time +efficient representation of a word list.

+
+
+
+

OPTIONS

+
+

-t + Verify that a given dawg file is equivalent to a given wordlist.

+

-r 1 + Reverse a word if it contains an RTL character.

+

-r 2 + Reverse all words.

+

-l <short> <long> + Produce a file with several dawgs in it, one each for words + of length <short>, <short+1>,… <long>

+
+
+
+

ARGUMENTS

+
+

WORDLIST + A plain text file in UTF-8, one word per line.

+

DAWG + The output DAWG to write.

+

lang.unicharset + The unicharset of the language. This is the unicharset + generated by mftraining(1).

+
+
+
+

SEE ALSO

+
+

tesseract(1), combine_tessdata(1), dawg2wordlist(1)

+ +
+
+
+

COPYING

+
+

Copyright (C) 2006 Google, Inc. +Licensed under the Apache License, Version 2.0

+
+
+
+

AUTHOR

+
+

The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present).

+
+
+
+

+ + + diff --git a/doc/wordlist2dawg.1.xml b/doc/wordlist2dawg.1.xml index 907d3a574d..bad256fe70 100644 --- a/doc/wordlist2dawg.1.xml +++ b/doc/wordlist2dawg.1.xml @@ -1,69 +1,69 @@ - - - - - - - WORDLIST2DAWG(1) - - -wordlist2dawg -1 -  -  - - - wordlist2dawg - convert a wordlist to a DAWG for Tesseract - - -wordlist2dawg WORDLIST DAWG lang.unicharset -wordlist2dawg -t WORDLIST DAWG lang.unicharset -wordlist2dawg -r 1 WORDLIST DAWG lang.unicharset -wordlist2dawg -r 2 WORDLIST DAWG lang.unicharset -wordlist2dawg -l <short> <long> WORDLIST DAWG lang.unicharset - - -DESCRIPTION -wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph -(DAWG) for use with Tesseract. A DAWG is a compressed, space and time -efficient representation of a word list. - - -OPTIONS --t - Verify that a given dawg file is equivalent to a given wordlist. --r 1 - Reverse a word if it contains an RTL character. --r 2 - Reverse all words. --l <short> <long> - Produce a file with several dawgs in it, one each for words - of length <short>, <short+1>,… <long> - - -ARGUMENTS -WORDLIST - A plain text file in UTF-8, one word per line. -DAWG - The output DAWG to write. -lang.unicharset - The unicharset of the language. This is the unicharset - generated by mftraining(1). - - -SEE ALSO -tesseract(1), combine_tessdata(1), dawg2wordlist(1) -https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract - - -COPYING -Copyright (C) 2006 Google, Inc. -Licensed under the Apache License, Version 2.0 - - -AUTHOR -The Tesseract OCR engine was written by Ray Smith and his research groups -at Hewlett Packard (1985-1995) and Google (2006-present). - - + + + + + + + WORDLIST2DAWG(1) + + +wordlist2dawg +1 +  +  + + + wordlist2dawg + convert a wordlist to a DAWG for Tesseract + + +wordlist2dawg WORDLIST DAWG lang.unicharset +wordlist2dawg -t WORDLIST DAWG lang.unicharset +wordlist2dawg -r 1 WORDLIST DAWG lang.unicharset +wordlist2dawg -r 2 WORDLIST DAWG lang.unicharset +wordlist2dawg -l <short> <long> WORDLIST DAWG lang.unicharset + + +DESCRIPTION +wordlist2dawg(1) converts a wordlist to a Directed Acyclic Word Graph +(DAWG) for use with Tesseract. A DAWG is a compressed, space and time +efficient representation of a word list. + + +OPTIONS +-t + Verify that a given dawg file is equivalent to a given wordlist. +-r 1 + Reverse a word if it contains an RTL character. +-r 2 + Reverse all words. +-l <short> <long> + Produce a file with several dawgs in it, one each for words + of length <short>, <short+1>,… <long> + + +ARGUMENTS +WORDLIST + A plain text file in UTF-8, one word per line. +DAWG + The output DAWG to write. +lang.unicharset + The unicharset of the language. This is the unicharset + generated by mftraining(1). + + +SEE ALSO +tesseract(1), combine_tessdata(1), dawg2wordlist(1) +https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract + + +COPYING +Copyright (C) 2006 Google, Inc. +Licensed under the Apache License, Version 2.0 + + +AUTHOR +The Tesseract OCR engine was written by Ray Smith and his research groups +at Hewlett Packard (1985-1995) and Google (2006-present). + + From f60bfbe55c056e14edfd65929e57e9d2bea63249 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 7 Dec 2016 19:52:18 +0100 Subject: [PATCH 082/132] fix typo --- ccstruct/imagedata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccstruct/imagedata.cpp b/ccstruct/imagedata.cpp index eb1eab7f07..0c3f7cab78 100644 --- a/ccstruct/imagedata.cpp +++ b/ccstruct/imagedata.cpp @@ -32,7 +32,7 @@ #if defined(__MINGW32__) # include -##elif __cplusplus <= 199711L // in C++11 +#elif __cplusplus > 199711L // in C++11 # include #endif From f29abea1609d66f86a9a73bfe0e0774b4bcb79e7 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 4 Dec 2016 20:04:10 +0100 Subject: [PATCH 083/132] tesseract: Disable Leptonica messages Disable debugging and informational messages from Leptonica for release builds. Signed-off-by: Stefan Weil --- api/tesseractmain.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/api/tesseractmain.cpp b/api/tesseractmain.cpp index 8f85ff8c98..7cd4021fbe 100644 --- a/api/tesseractmain.cpp +++ b/api/tesseractmain.cpp @@ -388,6 +388,11 @@ int main(int argc, char** argv) { static GenericVector vars_vec; static GenericVector vars_values; +#if !defined(DEBUG) + // Disable debugging and informational messages from Leptonica. + setMsgSeverity(L_SEVERITY_WARNING); +#endif + #if defined(HAVE_TIFFIO_H) && defined(_WIN32) /* Show libtiff warnings on console (not in GUI). */ TIFFSetWarningHandler(Win32WarningHandler); From d969ed13522e01a03c633d3bf585fac9ebd5f5ca Mon Sep 17 00:00:00 2001 From: Jeff Breidenbach Date: Wed, 7 Dec 2016 22:03:28 +0100 Subject: [PATCH 084/132] Produce warning for invalid resolution. Fix #453 --- api/baseapi.cpp | 2 ++ ccmain/osdetect.cpp | 10 ++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index bd4668afc9..a5540aeeaa 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -2220,6 +2220,8 @@ void TessBaseAPI::Threshold(Pix** pix) { if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) { // Use the minimum default resolution, as it is safer to under-estimate // than over-estimate resolution. + tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", + y_res, kMinCredibleResolution); thresholder_->SetSourceYResolution(kMinCredibleResolution); } PageSegMode pageseg_mode = diff --git a/ccmain/osdetect.cpp b/ccmain/osdetect.cpp index f2fe94a1c3..69e626222f 100644 --- a/ccmain/osdetect.cpp +++ b/ccmain/osdetect.cpp @@ -164,8 +164,14 @@ void remove_nontext_regions(tesseract::Tesseract *tess, BLOCK_LIST *blocks, int vertical_y = 1; tesseract::TabVector_LIST v_lines; tesseract::TabVector_LIST h_lines; - int resolution = (kMinCredibleResolution > pixGetXRes(pix)) ? - kMinCredibleResolution : pixGetXRes(pix); + int resolution; + if (kMinCredibleResolution > pixGetXRes(pix)) { + resolution = kMinCredibleResolution; + tprintf("Warning. Invalid resolution %d dpi. Using %d instead.\n", + pixGetXRes(pix), resolution); + } else { + resolution = pixGetXRes(pix); + } tesseract::LineFinder::FindAndRemoveLines(resolution, false, pix, &vertical_x, &vertical_y, From 59ba80bb3a5b95d17e1b5c64358343b49f15046a Mon Sep 17 00:00:00 2001 From: Zdenko Podobn?? Date: Thu, 8 Dec 2016 15:17:58 +0100 Subject: [PATCH 085/132] More clang-tidy from previous commits # Conflicts: # opencl/opencl_device_selection.h # opencl/openclwrapper.cpp --- api/baseapi.h | 4 +- ccmain/thresholder.cpp | 8 +- ccutil/strngs.h | 4 +- opencl/opencl_device_selection.h | 78 ++- opencl/openclwrapper.cpp | 842 ++++++++++++++----------------- opencl/openclwrapper.h | 11 +- textord/blkocc.h | 5 +- 7 files changed, 412 insertions(+), 540 deletions(-) diff --git a/api/baseapi.h b/api/baseapi.h index 9c98112ec6..65afa36269 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -847,9 +847,7 @@ class TESS_API TessBaseAPI { int** y1, PAGE_RES* page_res); - TESS_LOCAL const PAGE_RES* GetPageRes() const { - return page_res_; - } + TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; } /* @} */ diff --git a/ccmain/thresholder.cpp b/ccmain/thresholder.cpp index fc8111ef7b..77069bc9d9 100644 --- a/ccmain/thresholder.cpp +++ b/ccmain/thresholder.cpp @@ -268,10 +268,10 @@ void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix, OpenclDevice od; if ((num_channels == 4 || num_channels == 1) && od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) { - od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), - num_channels, pixGetWpl(src_pix) * 4, - thresholds, hi_values, out_pix /*pix_OCL*/, - rect_height_, rect_width_, rect_top_, rect_left_); + od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels, + pixGetWpl(src_pix) * 4, thresholds, hi_values, + out_pix /*pix_OCL*/, rect_height_, rect_width_, + rect_top_, rect_left_); } else { #endif ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix); diff --git a/ccutil/strngs.h b/ccutil/strngs.h index ea1738895c..2e65463efd 100644 --- a/ccutil/strngs.h +++ b/ccutil/strngs.h @@ -147,9 +147,7 @@ class TESS_API STRING } // returns the string data part of storage - inline char* GetCStr() { - return ((char *)data_) + sizeof(STRING_HEADER); - } + inline char* GetCStr() { return ((char*)data_) + sizeof(STRING_HEADER); } inline const char* GetCStr() const { return ((const char *)data_) + sizeof(STRING_HEADER); diff --git a/opencl/opencl_device_selection.h b/opencl/opencl_device_selection.h index 5161125683..b595be62d4 100644 --- a/opencl/opencl_device_selection.h +++ b/opencl/opencl_device_selection.h @@ -68,8 +68,8 @@ typedef struct { typedef ds_status (*ds_score_release)(void* score); static ds_status releaseDSProfile(ds_profile* profile, ds_score_release sr) { ds_status status = DS_SUCCESS; - if (profile!=NULL) { - if (profile->devices!=NULL && sr!=NULL) { + if (profile != NULL) { + if (profile->devices != NULL && sr != NULL) { unsigned int i; for (i = 0; i < profile->numDevices; i++) { free(profile->devices[i].oclDeviceName); @@ -90,19 +90,15 @@ static ds_status initDSProfile(ds_profile** p, const char* version) { int numDevices; cl_uint numPlatforms; cl_platform_id* platforms = NULL; - cl_device_id* devices = NULL; + cl_device_id* devices = NULL; ds_status status = DS_SUCCESS; - ds_profile* profile = NULL; unsigned int next; unsigned int i; - if (p == NULL) - return DS_INVALID_PROFILE; + if (p == NULL) return DS_INVALID_PROFILE; + ds_profile* profile = (ds_profile*)malloc(sizeof(ds_profile)); + if (profile == NULL) return DS_MEMORY_ERROR; - profile = (ds_profile*)malloc(sizeof(ds_profile)); - if (profile == NULL) - return DS_MEMORY_ERROR; - memset(profile, 0, sizeof(ds_profile)); clGetPlatformIDs(0, NULL, &numPlatforms); @@ -132,7 +128,8 @@ static ds_status initDSProfile(ds_profile** p, const char* version) { } profile->numDevices = numDevices+1; // +1 to numDevices to include the native CPU - profile->devices = (ds_device*)malloc(profile->numDevices*sizeof(ds_device)); + profile->devices = + (ds_device*)malloc(profile->numDevices * sizeof(ds_device)); if (profile->devices == NULL) { profile->numDevices = 0; status = DS_MEMORY_ERROR; @@ -152,14 +149,14 @@ static ds_status initDSProfile(ds_profile** p, const char* version) { profile->devices[next].type = DS_DEVICE_OPENCL_DEVICE; profile->devices[next].oclDeviceID = devices[j]; - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME - , DS_DEVICE_NAME_LENGTH, &buffer, NULL); + clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DEVICE_NAME, + DS_DEVICE_NAME_LENGTH, &buffer, NULL); length = strlen(buffer); profile->devices[next].oclDeviceName = (char*)malloc(length+1); memcpy(profile->devices[next].oclDeviceName, buffer, length+1); - clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION - , DS_DEVICE_NAME_LENGTH, &buffer, NULL); + clGetDeviceInfo(profile->devices[next].oclDeviceID, CL_DRIVER_VERSION, + DS_DEVICE_NAME_LENGTH, &buffer, NULL); length = strlen(buffer); profile->devices[next].oclDriverVersion = (char*)malloc(length+1); memcpy(profile->devices[next].oclDriverVersion, buffer, length+1); @@ -214,8 +211,7 @@ static ds_status profileDevices(ds_profile* profile, switch (type) { case DS_EVALUATE_NEW_ONLY: - if (profile->devices[i].score != NULL) - break; + if (profile->devices[i].score != NULL) break; // else fall through case DS_EVALUATE_ALL: evaluatorStatus = evaluator(profile->devices+i, evaluatorData); @@ -260,14 +256,11 @@ static ds_status writeProfileToFile(ds_profile* profile, ds_score_serializer serializer, const char* file) { ds_status status = DS_SUCCESS; - FILE* profileFile = NULL; + if (profile == NULL) return DS_INVALID_PROFILE; - if (profile == NULL) - return DS_INVALID_PROFILE; - - profileFile = fopen(file, "wb"); - if (profileFile==NULL) { + FILE* profileFile = fopen(file, "wb"); + if (profileFile == NULL) { status = DS_FILE_ERROR; } else { @@ -330,7 +323,8 @@ static ds_status writeProfileToFile(ds_profile* profile, fwrite(DS_TAG_SCORE, sizeof(char), strlen(DS_TAG_SCORE), profileFile); status = serializer(profile->devices+i, &serializedScore, &serializedScoreSize); - if (status == DS_SUCCESS && serializedScore!=NULL && serializedScoreSize > 0) { + if (status == DS_SUCCESS && serializedScore != NULL && + serializedScoreSize > 0) { fwrite(serializedScore, sizeof(char), serializedScoreSize, profileFile); free(serializedScore); } @@ -346,23 +340,21 @@ static ds_status writeProfileToFile(ds_profile* profile, static ds_status readProFile(const char* fileName, char** content, size_t* contentSize) { - FILE * input = NULL; size_t size = 0; - char* binary = NULL; *contentSize = 0; *content = NULL; - input = fopen(fileName, "rb"); - if(input == NULL) { + FILE* input = fopen(fileName, "rb"); + if (input == NULL) { return DS_FILE_ERROR; } fseek(input, 0L, SEEK_END); size = ftell(input); rewind(input); - binary = (char*)malloc(size); - if(binary == NULL) { + char* binary = (char*)malloc(size); + if (binary == NULL) { fclose(input); return DS_FILE_ERROR; } @@ -409,8 +401,7 @@ static ds_status readProfileFromFile(ds_profile* profile, const char* contentEnd = NULL; size_t contentSize; - if (profile==NULL) - return DS_INVALID_PROFILE; + if (profile == NULL) return DS_INVALID_PROFILE; status = readProFile(file, &contentStart, &contentSize); if (status == DS_SUCCESS) { @@ -432,7 +423,7 @@ static ds_status readProfileFromFile(ds_profile* profile, dataStart += strlen(DS_TAG_VERSION); dataEnd = findString(dataStart, contentEnd, DS_TAG_VERSION_END); - if (dataEnd==NULL) { + if (dataEnd == NULL) { status = DS_PROFILE_FILE_ERROR; goto cleanup; } @@ -464,27 +455,27 @@ static ds_status readProfileFromFile(ds_profile* profile, const char* deviceDriverEnd; dataStart = findString(currentPosition, contentEnd, DS_TAG_DEVICE); - if (dataStart==NULL) { + if (dataStart == NULL) { // nothing useful remain, quit... break; } dataStart+=strlen(DS_TAG_DEVICE); dataEnd = findString(dataStart, contentEnd, DS_TAG_DEVICE_END); - if (dataEnd==NULL) { + if (dataEnd == NULL) { status = DS_PROFILE_FILE_ERROR; goto cleanup; } // parse the device type deviceTypeStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_TYPE); - if (deviceTypeStart==NULL) { + if (deviceTypeStart == NULL) { status = DS_PROFILE_FILE_ERROR; goto cleanup; } deviceTypeStart+=strlen(DS_TAG_DEVICE_TYPE); deviceTypeEnd = findString(deviceTypeStart, contentEnd, DS_TAG_DEVICE_TYPE_END); - if (deviceTypeEnd==NULL) { + if (deviceTypeEnd == NULL) { status = DS_PROFILE_FILE_ERROR; goto cleanup; } @@ -495,14 +486,14 @@ static ds_status readProfileFromFile(ds_profile* profile, if (deviceType == DS_DEVICE_OPENCL_DEVICE) { deviceNameStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_NAME); - if (deviceNameStart==NULL) { + if (deviceNameStart == NULL) { status = DS_PROFILE_FILE_ERROR; goto cleanup; } deviceNameStart+=strlen(DS_TAG_DEVICE_NAME); deviceNameEnd = findString(deviceNameStart, contentEnd, DS_TAG_DEVICE_NAME_END); - if (deviceNameEnd==NULL) { + if (deviceNameEnd == NULL) { status = DS_PROFILE_FILE_ERROR; goto cleanup; } @@ -510,19 +501,18 @@ static ds_status readProfileFromFile(ds_profile* profile, deviceDriverStart = findString(dataStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION); - if (deviceDriverStart==NULL) { + if (deviceDriverStart == NULL) { status = DS_PROFILE_FILE_ERROR; goto cleanup; } deviceDriverStart+=strlen(DS_TAG_DEVICE_DRIVER_VERSION); deviceDriverEnd = findString(deviceDriverStart, contentEnd, DS_TAG_DEVICE_DRIVER_VERSION_END); - if (deviceDriverEnd ==NULL) { + if (deviceDriverEnd == NULL) { status = DS_PROFILE_FILE_ERROR; goto cleanup; } - // check if this device is on the system for (i = 0; i < profile->numDevices; i++) { if (profile->devices[i].type == DS_DEVICE_OPENCL_DEVICE) { @@ -538,7 +528,7 @@ static ds_status readProfileFromFile(ds_profile* profile, && strncmp(profile->devices[i].oclDriverVersion, deviceDriverStart, driverVersionLength)==0) { deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE); - if (deviceNameStart==NULL) { + if (deviceNameStart == NULL) { status = DS_PROFILE_FILE_ERROR; goto cleanup; } @@ -560,7 +550,7 @@ static ds_status readProfileFromFile(ds_profile* profile, for (i = 0; i < profile->numDevices; i++) { if (profile->devices[i].type == DS_DEVICE_NATIVE_CPU) { deviceScoreStart = findString(dataStart, contentEnd, DS_TAG_SCORE); - if (deviceScoreStart==NULL) { + if (deviceScoreStart == NULL) { status = DS_PROFILE_FILE_ERROR; goto cleanup; } diff --git a/opencl/openclwrapper.cpp b/opencl/openclwrapper.cpp index 1361299b77..c3bb745127 100644 --- a/opencl/openclwrapper.cpp +++ b/opencl/openclwrapper.cpp @@ -112,10 +112,11 @@ void legalizeFileName( char *fileName) { // initial ./ is valid for present directory //if (*pos == '.') pos++; //if (*pos == '/') pos++; - for ( char *pos = strstr(fileName, invalidStr); pos != NULL; pos = strstr(pos+1, invalidStr)) { - //printf("\tfound: %s, ", pos); - pos[0] = '_'; - //printf("fileName: %s\n", fileName); + for (char *pos = strstr(fileName, invalidStr); pos != NULL; + pos = strstr(pos + 1, invalidStr)) { + // printf("\tfound: %s, ", pos); + pos[0] = '_'; + // printf("fileName: %s\n", fileName); } } } @@ -128,17 +129,22 @@ void populateGPUEnvFromDevice( GPUEnv *gpuInfo, cl_device_id device ) { gpuInfo->mpDevID = device; gpuInfo->mpArryDevsID = new cl_device_id[1]; gpuInfo->mpArryDevsID[0] = gpuInfo->mpDevID; - clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE , sizeof(cl_device_type), &gpuInfo->mDevType , &size); + clStatus = + clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_TYPE, + sizeof(cl_device_type), &gpuInfo->mDevType, &size); CHECK_OPENCL( clStatus, "populateGPUEnv::getDeviceInfo(TYPE)"); // platform - clStatus = clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM , sizeof(cl_platform_id), &gpuInfo->mpPlatformID , &size); + clStatus = + clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PLATFORM, + sizeof(cl_platform_id), &gpuInfo->mpPlatformID, &size); CHECK_OPENCL( clStatus, "populateGPUEnv::getDeviceInfo(PLATFORM)"); // context cl_context_properties props[3]; props[0] = CL_CONTEXT_PLATFORM; props[1] = (cl_context_properties) gpuInfo->mpPlatformID; props[2] = 0; - gpuInfo->mpContext = clCreateContext(props, 1, &gpuInfo->mpDevID, NULL, NULL, &clStatus); + gpuInfo->mpContext = clCreateContext(props, 1, &gpuInfo->mpDevID, NULL, + NULL, &clStatus); CHECK_OPENCL( clStatus, "populateGPUEnv::createContext"); // queue cl_command_queue_properties queueProperties = 0; @@ -149,16 +155,15 @@ void populateGPUEnvFromDevice( GPUEnv *gpuInfo, cl_device_id device ) { int OpenclDevice::LoadOpencl() { #ifdef WIN32 - HINSTANCE HOpenclDll = NULL; - void * OpenclDll = NULL; - //fprintf(stderr, " LoadOpenclDllxx... \n"); - OpenclDll = static_cast( HOpenclDll ); - OpenclDll = LoadLibrary( "openCL.dll" ); - if ( !static_cast( OpenclDll ) ) - { - fprintf(stderr, "[OD] Load opencl.dll failed!\n"); - FreeLibrary( static_cast( OpenclDll ) ); - return 0; + HINSTANCE HOpenclDll = NULL; + void *OpenclDll = NULL; + // fprintf(stderr, " LoadOpenclDllxx... \n"); + OpenclDll = static_cast(HOpenclDll); + OpenclDll = LoadLibrary("openCL.dll"); + if (!static_cast(OpenclDll)) { + fprintf(stderr, "[OD] Load opencl.dll failed!\n"); + FreeLibrary(static_cast(OpenclDll)); + return 0; } fprintf(stderr, "[OD] Load opencl.dll successful!\n"); #endif @@ -205,7 +210,8 @@ PIX *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX *pixd, PIX *pixs, pixSetData(pixd, pValues); } - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, clbuffer, pValues, 0, NULL, NULL); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, clbuffer, pValues, 0, NULL, + NULL); if (sync) { clFinish(rEnv.mpkCmdQueue); @@ -216,20 +222,21 @@ PIX *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX *pixd, PIX *pixs, cl_mem allocateIntBuffer( KernelEnv rEnv, const l_uint32 *_pValues, size_t nElements, cl_int *pStatus , bool sync = false) { - cl_mem xValues = clCreateBuffer( rEnv.mpkContext, (cl_mem_flags) (CL_MEM_READ_WRITE), - nElements * sizeof(l_int32), NULL, pStatus); + cl_mem xValues = + clCreateBuffer(rEnv.mpkContext, (cl_mem_flags)(CL_MEM_READ_WRITE), + nElements * sizeof(l_int32), NULL, pStatus); - if (_pValues != NULL) - { - l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0, - nElements * sizeof(l_int32), 0, NULL, NULL, NULL ); + if (_pValues != NULL) { + l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer( + rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0, + nElements * sizeof(l_int32), 0, NULL, NULL, NULL); - memcpy(pValues, _pValues, nElements * sizeof(l_int32)); + memcpy(pValues, _pValues, nElements * sizeof(l_int32)); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, xValues, pValues, 0, NULL, + NULL); - if (sync) - clFinish( rEnv.mpkCmdQueue ); + if (sync) clFinish(rEnv.mpkCmdQueue); } return xValues; @@ -238,27 +245,25 @@ PIX *mapOutputCLBuffer(KernelEnv rEnv, cl_mem clbuffer, PIX *pixd, PIX *pixs, void OpenclDevice::releaseMorphCLBuffers() { - if (pixdCLIntermediate != NULL) - clReleaseMemObject(pixdCLIntermediate); - if (pixsCLBuffer != NULL) - clReleaseMemObject(pixsCLBuffer); - if (pixdCLBuffer != NULL) - clReleaseMemObject(pixdCLBuffer); - if (pixThBuffer != NULL) - clReleaseMemObject(pixThBuffer); - pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = NULL; + if (pixdCLIntermediate != NULL) clReleaseMemObject(pixdCLIntermediate); + if (pixsCLBuffer != NULL) clReleaseMemObject(pixsCLBuffer); + if (pixdCLBuffer != NULL) clReleaseMemObject(pixdCLBuffer); + if (pixThBuffer != NULL) clReleaseMemObject(pixThBuffer); + pixdCLIntermediate = pixsCLBuffer = pixdCLBuffer = pixThBuffer = NULL; } int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs) { SetKernelEnv( &rEnv ); - if (pixThBuffer != NULL) - { - pixsCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus); + if (pixThBuffer != NULL) { + pixsCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl * h, + CL_MEM_ALLOC_HOST_PTR, &clStatus); - //Get the output from ThresholdToPix operation - clStatus = clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0, sizeof(l_uint32) * wpl*h, 0, NULL, NULL); + // Get the output from ThresholdToPix operation + clStatus = + clEnqueueCopyBuffer(rEnv.mpkCmdQueue, pixThBuffer, pixsCLBuffer, 0, 0, + sizeof(l_uint32) * wpl * h, 0, NULL, NULL); } else { @@ -269,9 +274,11 @@ int OpenclDevice::initMorphCLAllocations(l_int32 wpl, l_int32 h, PIX* pixs) pixsCLBuffer = allocateZeroCopyBuffer(rEnv, srcdata, wpl*h, CL_MEM_USE_HOST_PTR, &clStatus); } - pixdCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus); + pixdCLBuffer = allocateZeroCopyBuffer(rEnv, NULL, wpl * h, + CL_MEM_ALLOC_HOST_PTR, &clStatus); - pixdCLIntermediate = allocateZeroCopyBuffer(rEnv, NULL, wpl*h, CL_MEM_ALLOC_HOST_PTR, &clStatus); + pixdCLIntermediate = allocateZeroCopyBuffer( + rEnv, NULL, wpl * h, CL_MEM_ALLOC_HOST_PTR, &clStatus); return (int)clStatus; } @@ -414,9 +421,8 @@ int OpenclDevice::BinaryGenerated( const char * clFileName, FILE ** fhandle ) legalizeFileName(fileName); fd = fopen(fileName, "rb"); status = (fd != NULL) ? 1 : 0; - if ( fd != NULL ) - { - *fhandle = fd; + if (fd != NULL) { + *fhandle = fd; } return status; @@ -428,9 +434,8 @@ int OpenclDevice::CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * cl { if ( strcasecmp( gpuEnvCached->mArryKnelSrcFile[i], clFileName ) == 0 ) { - if ( gpuEnvCached->mpArryPrograms[i] != NULL ) - { - return 1; + if (gpuEnvCached->mpArryPrograms[i] != NULL) { + return 1; } } } @@ -439,11 +444,10 @@ int OpenclDevice::CachedOfKernerPrg( const GPUEnv *gpuEnvCached, const char * cl } int OpenclDevice::WriteBinaryToFile( const char* fileName, const char* birary, size_t numBytes ) { - FILE *output = NULL; - output = fopen( fileName, "wb" ); - if ( output == NULL ) - { - return 0; + FILE *output = NULL; + output = fopen(fileName, "wb"); + if (output == NULL) { + return 0; } fwrite( birary, sizeof(char), numBytes, output ); @@ -461,32 +465,32 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c cl_device_id *mpArryDevsID; char **binaries, *str = NULL; - clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES, - sizeof(numDevices), &numDevices, NULL ); + clStatus = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, + sizeof(numDevices), &numDevices, NULL); CHECK_OPENCL( clStatus, "clGetProgramInfo" ); mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) * numDevices ); - if ( mpArryDevsID == NULL ) - { - return 0; + if (mpArryDevsID == NULL) { + return 0; } /* grab the handles to all of the devices in the program. */ - clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES, - sizeof(cl_device_id) * numDevices, mpArryDevsID, NULL ); + clStatus = clGetProgramInfo(program, CL_PROGRAM_DEVICES, + sizeof(cl_device_id) * numDevices, mpArryDevsID, + NULL); CHECK_OPENCL( clStatus, "clGetProgramInfo" ); /* figure out the sizes of each of the binaries. */ binarySizes = (size_t*) malloc( sizeof(size_t) * numDevices ); - clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, - sizeof(size_t) * numDevices, binarySizes, NULL ); + clStatus = + clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, + sizeof(size_t) * numDevices, binarySizes, NULL); CHECK_OPENCL( clStatus, "clGetProgramInfo" ); /* copy over all of the generated binaries. */ binaries = (char**) malloc( sizeof(char *) * numDevices ); - if ( binaries == NULL ) - { - return 0; + if (binaries == NULL) { + return 0; } for ( i = 0; i < numDevices; i++ ) @@ -494,19 +498,18 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c if ( binarySizes[i] != 0 ) { binaries[i] = (char*) malloc( sizeof(char) * binarySizes[i] ); - if ( binaries[i] == NULL ) - { - return 0; + if (binaries[i] == NULL) { + return 0; } } else { - binaries[i] = NULL; + binaries[i] = NULL; } } - clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES, - sizeof(char *) * numDevices, binaries, NULL ); + clStatus = clGetProgramInfo(program, CL_PROGRAM_BINARIES, + sizeof(char *) * numDevices, binaries, NULL); CHECK_OPENCL(clStatus,"clGetProgramInfo"); /* dump out each binary into its own separate file. */ @@ -518,7 +521,7 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c { char deviceName[1024]; clStatus = clGetDeviceInfo(mpArryDevsID[i], CL_DEVICE_NAME, - sizeof(deviceName), deviceName, NULL); + sizeof(deviceName), deviceName, NULL); CHECK_OPENCL( clStatus, "clGetDeviceInfo" ); str = (char*) strstr( clFileName, (char*) ".cl" ); @@ -556,16 +559,16 @@ int OpenclDevice::GeneratBinFromKernelSource( cl_program program, const char * c void copyIntBuffer( KernelEnv rEnv, cl_mem xValues, const l_uint32 *_pValues, size_t nElements, cl_int *pStatus ) { - l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer( rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0, - nElements * sizeof(l_int32), 0, NULL, NULL, NULL ); - clFinish( rEnv.mpkCmdQueue ); - if (_pValues != NULL) - { - for ( int i = 0; i < (int)nElements; i++ ) - pValues[i] = (l_int32)_pValues[i]; + l_int32 *pValues = (l_int32 *)clEnqueueMapBuffer( + rEnv.mpkCmdQueue, xValues, CL_TRUE, CL_MAP_WRITE, 0, + nElements * sizeof(l_int32), 0, NULL, NULL, NULL); + clFinish(rEnv.mpkCmdQueue); + if (_pValues != NULL) { + for (int i = 0; i < (int)nElements; i++) pValues[i] = (l_int32)_pValues[i]; } - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue,xValues,pValues,0,NULL,NULL); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, xValues, pValues, 0, NULL, + NULL); //clFinish( rEnv.mpkCmdQueue ); return; } @@ -599,14 +602,13 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption ) //PERF_COUNT_SUB("BinaryGenerated") if ( binaryExisted == 1 ) { - clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES, - sizeof(numDevices), &numDevices, NULL ); - CHECK_OPENCL( clStatus, "clGetContextInfo" ); + clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_NUM_DEVICES, + sizeof(numDevices), &numDevices, NULL); + CHECK_OPENCL(clStatus, "clGetContextInfo"); - mpArryDevsID = (cl_device_id*) malloc( sizeof(cl_device_id) * numDevices ); - if ( mpArryDevsID == NULL ) - { - return 0; + mpArryDevsID = (cl_device_id *)malloc(sizeof(cl_device_id) * numDevices); + if (mpArryDevsID == NULL) { + return 0; } //PERF_COUNT_SUB("get numDevices") b_error = 0; @@ -633,8 +635,9 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption ) //PERF_COUNT_SUB("read file") fd = NULL; // grab the handles to all of the devices in the context. - clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES, - sizeof( cl_device_id ) * numDevices, mpArryDevsID, NULL ); + clStatus = clGetContextInfo(gpuInfo->mpContext, CL_CONTEXT_DEVICES, + sizeof(cl_device_id) * numDevices, + mpArryDevsID, NULL); CHECK_OPENCL( clStatus, "clGetContextInfo" ); //PERF_COUNT_SUB("get devices") //fprintf(stderr, "[OD] Create kernel from binary\n"); @@ -646,7 +649,7 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption ) free( binary ); free( mpArryDevsID ); mpArryDevsID = NULL; -//PERF_COUNT_SUB("binaryExisted") + // PERF_COUNT_SUB("binaryExisted") } else { @@ -658,9 +661,8 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption ) //PERF_COUNT_SUB("!binaryExisted") } - if ( gpuInfo->mpArryPrograms[idx] == (cl_program) NULL ) - { - return 0; + if (gpuInfo->mpArryPrograms[idx] == (cl_program) NULL) { + return 0; } //char options[512]; @@ -669,15 +671,17 @@ int OpenclDevice::CompileKernelFile( GPUEnv *gpuInfo, const char *buildOption ) PERF_COUNT_START("OD::CompileKernel::clBuildProgram") if (!gpuInfo->mnIsUserCreated) { - clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID, - buildOption, NULL, NULL); -//PERF_COUNT_SUB("clBuildProgram notUserCreated") + clStatus = + clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID, + buildOption, NULL, NULL); + // PERF_COUNT_SUB("clBuildProgram notUserCreated") } else { - clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID), - buildOption, NULL, NULL); -//PERF_COUNT_SUB("clBuildProgram isUserCreated") + clStatus = + clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID), + buildOption, NULL, NULL); + // PERF_COUNT_SUB("clBuildProgram isUserCreated") } PERF_COUNT_END if ( clStatus != CL_SUCCESS ) @@ -685,13 +689,15 @@ PERF_COUNT_END printf ("BuildProgram error!\n"); if ( !gpuInfo->mnIsUserCreated ) { - clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], - CL_PROGRAM_BUILD_LOG, 0, NULL, &length ); + clStatus = clGetProgramBuildInfo( + gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0], + CL_PROGRAM_BUILD_LOG, 0, NULL, &length); } else { - clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, - CL_PROGRAM_BUILD_LOG, 0, NULL, &length); + clStatus = clGetProgramBuildInfo( + gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID, + CL_PROGRAM_BUILD_LOG, 0, NULL, &length); } if ( clStatus != CL_SUCCESS ) { @@ -699,9 +705,8 @@ PERF_COUNT_END return 0; } buildLog = (char*) malloc( length ); - if ( buildLog == (char*) NULL ) - { - return 0; + if (buildLog == (char *)NULL) { + return 0; } if ( !gpuInfo->mnIsUserCreated ) { @@ -720,10 +725,9 @@ PERF_COUNT_END } fd1 = fopen( "kernel-build.log", "w+" ); - if ( fd1 != NULL ) - { - fwrite( buildLog, sizeof(char), length, fd1 ); - fclose( fd1 ); + if (fd1 != NULL) { + fwrite(buildLog, sizeof(char), length, fd1); + fclose(fd1); } free( buildLog ); @@ -766,36 +770,40 @@ PERF_COUNT_START("pixReadFromTiffKernel") l_uint32 *pResult = (l_uint32 *)malloc(w*h * sizeof(l_uint32)); rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "composeRGBPixel", &clStatus ); - CHECK_OPENCL( clStatus, "clCreateKernel composeRGBPixel"); + CHECK_OPENCL(clStatus, "clCreateKernel composeRGBPixel"); //Allocate input and output OCL buffers valuesCl = allocateZeroCopyBuffer(rEnv, tiffdata, w*h, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, &clStatus); outputCl = allocateZeroCopyBuffer(rEnv, pResult, w*h, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, &clStatus); //Kernel arguments - clStatus = clSetKernelArg( rEnv.mpkKernel, 0, sizeof(cl_mem), &valuesCl ); + clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &valuesCl); CHECK_OPENCL( clStatus, "clSetKernelArg"); - clStatus = clSetKernelArg( rEnv.mpkKernel, 1, sizeof(w), &w ); + clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(w), &w); CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 2, sizeof(h), &h ); + clStatus = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(h), &h); CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 3, sizeof(wpl), &wpl ); + clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); CHECK_OPENCL( clStatus, "clSetKernelArg" ); - clStatus = clSetKernelArg( rEnv.mpkKernel, 4, sizeof(cl_mem), &outputCl ); + clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &outputCl); CHECK_OPENCL( clStatus, "clSetKernelArg"); //Kernel enqueue PERF_COUNT_SUB("before") - clStatus = clEnqueueNDRangeKernel( rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel" ); - - /* map results back from gpu */ - void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ, 0, w*h * sizeof(l_uint32), 0, NULL, NULL, &clStatus); - CHECK_OPENCL( clStatus, "clEnqueueMapBuffer outputCl"); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, NULL, NULL); - - //Sync - clFinish( rEnv.mpkCmdQueue ); +clStatus = + clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); +CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel"); + +/* map results back from gpu */ +void *ptr = clEnqueueMapBuffer(rEnv.mpkCmdQueue, outputCl, CL_TRUE, CL_MAP_READ, + 0, w * h * sizeof(l_uint32), 0, NULL, NULL, + &clStatus); +CHECK_OPENCL(clStatus, "clEnqueueMapBuffer outputCl"); +clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, outputCl, ptr, 0, NULL, NULL); + +// Sync +clFinish(rEnv.mpkCmdQueue); PERF_COUNT_SUB("kernel & map") PERF_COUNT_END return pResult; @@ -811,13 +819,13 @@ PIX *pix; PROCNAME("pixReadTiff"); if (!filename) - return (PIX *)ERROR_PTR("filename not defined", procName, NULL); + return (PIX *)ERROR_PTR("filename not defined", procName, NULL); if ((fp = fopenReadStream(filename)) == NULL) - return (PIX *)ERROR_PTR("image file not found", procName, NULL); + return (PIX *)ERROR_PTR("image file not found", procName, NULL); if ((pix = pixReadStreamTiffCl(fp, n)) == NULL) { - fclose(fp); - return (PIX *)ERROR_PTR("pix not read", procName, NULL); + fclose(fp); + return (PIX *)ERROR_PTR("pix not read", procName, NULL); } fclose(fp); PERF_COUNT_END @@ -831,13 +839,12 @@ l_int32 fd; PROCNAME("fopenTiff"); - if (!fp) - return (TIFF *)ERROR_PTR("stream not opened", procName, NULL); + if (!fp) return (TIFF *)ERROR_PTR("stream not opened", procName, NULL); if (!modestring) - return (TIFF *)ERROR_PTR("modestring not defined", procName, NULL); + return (TIFF *)ERROR_PTR("modestring not defined", procName, NULL); if ((fd = fileno(fp)) < 0) - return (TIFF *)ERROR_PTR("invalid file descriptor", procName, NULL); + return (TIFF *)ERROR_PTR("invalid file descriptor", procName, NULL); lseek(fd, 0, SEEK_SET); return TIFFFdOpen(fd, "TIFFstream", modestring); @@ -1087,26 +1094,26 @@ size_t *pdatasize) PROCNAME("fopenTiffMemstream"); if (!filename) - return (TIFF *)ERROR_PTR("filename not defined", procName, NULL); - if (!operation) - return (TIFF *)ERROR_PTR("operation not defined", procName, NULL); - if (!pdata) - return (TIFF *)ERROR_PTR("&data not defined", procName, NULL); - if (!pdatasize) - return (TIFF *)ERROR_PTR("&datasize not defined", procName, NULL); - if (!strcmp(operation, "r") && !strcmp(operation, "w")) - return (TIFF *)ERROR_PTR("operation not 'r' or 'w'}", procName, NULL); - - if (!strcmp(operation, "r")) - mstream = memstreamCreateForRead(*pdata, *pdatasize); - else - mstream = memstreamCreateForWrite(pdata, pdatasize); + return (TIFF *)ERROR_PTR("filename not defined", procName, NULL); + if (!operation) + return (TIFF *)ERROR_PTR("operation not defined", procName, NULL); + if (!pdata) + return (TIFF *)ERROR_PTR("&data not defined", procName, NULL); + if (!pdatasize) + return (TIFF *)ERROR_PTR("&datasize not defined", procName, NULL); + if (!strcmp(operation, "r") && !strcmp(operation, "w")) + return (TIFF *)ERROR_PTR("operation not 'r' or 'w'}", procName, + NULL); + + if (!strcmp(operation, "r")) + mstream = memstreamCreateForRead(*pdata, *pdatasize); + else + mstream = memstreamCreateForWrite(pdata, pdatasize); - return TIFFClientOpen(filename, operation, mstream, - tiffReadCallback, tiffWriteCallback, - tiffSeekCallback, tiffCloseCallback, - tiffSizeCallback, tiffMapCallback, - tiffUnmapCallback); + return TIFFClientOpen(filename, operation, mstream, tiffReadCallback, + tiffWriteCallback, tiffSeekCallback, + tiffCloseCallback, tiffSizeCallback, + tiffMapCallback, tiffUnmapCallback); } @@ -1161,11 +1168,10 @@ TIFF *tif; PROCNAME("pixReadStreamTiff"); - if (!fp) - return (PIX *)ERROR_PTR("stream not defined", procName, NULL); + if (!fp) return (PIX *)ERROR_PTR("stream not defined", procName, NULL); if ((tif = fopenTiffCl(fp, "rb")) == NULL) - return (PIX *)ERROR_PTR("tif not opened", procName, NULL); + return (PIX *)ERROR_PTR("tif not opened", procName, NULL); pagefound = FALSE; pix = NULL; @@ -1173,8 +1179,8 @@ TIFF *tif; if (i == n) { pagefound = TRUE; if ((pix = pixReadFromTiffStreamCl(tif)) == NULL) { - TIFFCleanup(tif); - return (PIX *)ERROR_PTR("pix not read", procName, NULL); + TIFFCleanup(tif); + return (PIX *)ERROR_PTR("pix not read", procName, NULL); } break; } @@ -1254,43 +1260,43 @@ PIXCMAP *cmap; PROCNAME("pixReadFromTiffStream"); - if (!tif) - return (PIX *)ERROR_PTR("tif not defined", procName, NULL); + if (!tif) return (PIX *)ERROR_PTR("tif not defined", procName, NULL); TIFFGetFieldDefaulted(tif, TIFFTAG_BITSPERSAMPLE, &bps); TIFFGetFieldDefaulted(tif, TIFFTAG_SAMPLESPERPIXEL, &spp); bpp = bps * spp; if (bpp > 32) - return (PIX *)ERROR_PTR("can't handle bpp > 32", procName, NULL); + return (PIX *)ERROR_PTR("can't handle bpp > 32", procName, NULL); if (spp == 1) d = bps; else if (spp == 3 || spp == 4) d = 32; else - return (PIX *)ERROR_PTR("spp not in set {1,3,4}", procName, NULL); + return (PIX *)ERROR_PTR("spp not in set {1,3,4}", procName, NULL); TIFFGetField(tif, TIFFTAG_IMAGEWIDTH, &w); TIFFGetField(tif, TIFFTAG_IMAGELENGTH, &h); tiffbpl = TIFFScanlineSize(tif); if ((pix = pixCreate(w, h, d)) == NULL) - return (PIX *)ERROR_PTR("pix not made", procName, NULL); + return (PIX *)ERROR_PTR("pix not made", procName, NULL); data = (l_uint8 *)pixGetData(pix); wpl = pixGetWpl(pix); bpl = 4 * wpl; if (spp == 1) { - if ((linebuf = (l_uint8 *)CALLOC(tiffbpl + 1, sizeof(l_uint8))) == NULL) - return (PIX *)ERROR_PTR("calloc fail for linebuf", procName, NULL); - - for (i = 0 ; i < h ; i++) { - if (TIFFReadScanline(tif, linebuf, i, 0) < 0) { - FREE(linebuf); - pixDestroy(&pix); - return (PIX *)ERROR_PTR("line read fail", procName, NULL); - } - memcpy((char *)data, (char *)linebuf, tiffbpl); - data += bpl; + if ((linebuf = (l_uint8 *)CALLOC(tiffbpl + 1, sizeof(l_uint8))) == + NULL) + return (PIX *)ERROR_PTR("calloc fail for linebuf", procName, NULL); + + for (i = 0; i < h; i++) { + if (TIFFReadScanline(tif, linebuf, i, 0) < 0) { + FREE(linebuf); + pixDestroy(&pix); + return (PIX *)ERROR_PTR("line read fail", procName, NULL); + } + memcpy((char *)data, (char *)linebuf, tiffbpl); + data += bpl; } if (bps <= 8) pixEndianByteSwap(pix); @@ -1331,9 +1337,9 @@ PIXCMAP *cmap; pixSetInputFormat(pix, comptype); if (TIFFGetField(tif, TIFFTAG_COLORMAP, &redmap, &greenmap, &bluemap)) { - if ((cmap = pixcmapCreate(bps)) == NULL) { - pixDestroy(&pix); - return (PIX *)ERROR_PTR("cmap not made", procName, NULL); + if ((cmap = pixcmapCreate(bps)) == NULL) { + pixDestroy(&pix); + return (PIX *)ERROR_PTR("cmap not made", procName, NULL); } ncolors = 1 << bps; for (i = 0; i < ncolors; i++) @@ -1400,24 +1406,12 @@ pixDilateCL_55(l_int32 wpl, l_int32 h) 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(wpl), - &wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(h), - &h); - - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); + + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); //Swap source and dest buffers pixtemp = pixsCLBuffer; @@ -1443,23 +1437,11 @@ pixDilateCL_55(l_int32 wpl, l_int32 h) 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(wpl), - &wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(h), - &h); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); return status; } @@ -1496,24 +1478,12 @@ pixErodeCL_55(l_int32 wpl, l_int32 h) 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(wpl), - &wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(h), - &h); - - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); + + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); //Swap source and dest buffers pixtemp = pixsCLBuffer; @@ -1539,31 +1509,13 @@ pixErodeCL_55(l_int32 wpl, l_int32 h) 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(wpl), - &wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(h), - &h); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(fwmask), - &fwmask); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(lwmask), - &lwmask); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(fwmask), &fwmask); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(lwmask), &lwmask); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); return status; } @@ -1613,12 +1565,11 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn); - status = - clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl); status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h); - status = - clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, - globalThreads, localThreads, 0, NULL, NULL); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); if (yp > 0 || yn > 0) { pixtemp = pixsCLBuffer; @@ -1637,14 +1588,12 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &pixsCLBuffer); status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &pixdCLBuffer); status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); - status = - clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isEven), - &isEven); - status = - clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, - globalThreads, localThreads, 0, NULL, NULL); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isEven), &isEven); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); if (yp > 0 || yn > 0) { pixtemp = pixsCLBuffer; @@ -1666,31 +1615,13 @@ pixDilateCL(l_int32 hsize, l_int32 vsize, l_int32 wpl, l_int32 h) 1, sizeof(cl_mem), &pixdCLBuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(yp), - &yp); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(wpl), - &wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(h), - &h); - status = clSetKernelArg(rEnv.mpkKernel, - 5, - sizeof(yn), - &yn); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); + status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(yn), &yn); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); } return status; @@ -1743,14 +1674,13 @@ cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) { status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(xn), &xn); status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(wpl), &wpl); status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(isAsymmetric), - &isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(rwmask), - &rwmask); - status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(lwmask), - &lwmask); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, - globalThreads, localThreads, 0, NULL, NULL); + status = + clSetKernelArg(rEnv.mpkKernel, 6, sizeof(isAsymmetric), &isAsymmetric); + status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(rwmask), &rwmask); + status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(lwmask), &lwmask); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); if (yp > 0 || yn > 0) { pixtemp = pixsCLBuffer; @@ -1767,16 +1697,14 @@ cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) { status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(xp), &xp); status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), - &isAsymmetric); - status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(rwmask), - &rwmask); - status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(lwmask), - &lwmask); - status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(isEven), - &isEven); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, - globalThreads, localThreads, 0, NULL, NULL); + status = + clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), &isAsymmetric); + status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(rwmask), &rwmask); + status = clSetKernelArg(rEnv.mpkKernel, 7, sizeof(lwmask), &lwmask); + status = clSetKernelArg(rEnv.mpkKernel, 8, sizeof(isEven), &isEven); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); if (yp > 0 || yn > 0) { pixtemp = pixsCLBuffer; @@ -1795,11 +1723,12 @@ cl_int pixErodeCL(l_int32 hsize, l_int32 vsize, l_uint32 wpl, l_uint32 h) { status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(yp), &yp); status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); - status = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), - &isAsymmetric); + status = + clSetKernelArg(rEnv.mpkKernel, 5, sizeof(isAsymmetric), &isAsymmetric); status = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(yn), &yn); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, - globalThreads, localThreads, 0, NULL, NULL); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); } return status; @@ -1948,23 +1877,11 @@ pixORCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem ou 2, sizeof(cl_mem), &outbuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(wpl), - &wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(h), - &h); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); return status; } @@ -1999,87 +1916,50 @@ pixANDCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem o 2, sizeof(cl_mem), &outbuffer); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(wpl), - &wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(h), - &h); - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(h), &h); + status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, + NULL, globalThreads, localThreads, 0, + NULL, NULL); return status; } //output = buffer1 & ~(buffer2) -cl_int -pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, cl_mem buffer2, cl_mem outBuffer = NULL) -{ - cl_int status; - size_t globalThreads[2]; - int gsize; - size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y}; +cl_int pixSubtractCL_work(l_uint32 wpl, l_uint32 h, cl_mem buffer1, + cl_mem buffer2, cl_mem outBuffer = NULL) { + cl_int status; + size_t globalThreads[2]; + int gsize; + size_t localThreads[] = {GROUPSIZE_X, GROUPSIZE_Y}; - gsize = (wpl + GROUPSIZE_X - 1)/ GROUPSIZE_X * GROUPSIZE_X; - globalThreads[0] = gsize; - gsize = (h + GROUPSIZE_Y - 1)/ GROUPSIZE_Y * GROUPSIZE_Y; - globalThreads[1] = gsize; + gsize = (wpl + GROUPSIZE_X - 1) / GROUPSIZE_X * GROUPSIZE_X; + globalThreads[0] = gsize; + gsize = (h + GROUPSIZE_Y - 1) / GROUPSIZE_Y * GROUPSIZE_Y; + globalThreads[1] = gsize; - if (outBuffer != NULL) - { - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "pixSubtract", &status ); - CHECK_OPENCL(status, "clCreateKernel pixSubtract"); - } - else - { - rEnv.mpkKernel = clCreateKernel( rEnv.mpkProgram, "pixSubtract_inplace", &status ); - CHECK_OPENCL(status, "clCreateKernel pixSubtract_inplace"); - } + if (outBuffer != NULL) { + rEnv.mpkKernel = clCreateKernel(rEnv.mpkProgram, "pixSubtract", &status); + CHECK_OPENCL(status, "clCreateKernel pixSubtract"); + } else { + rEnv.mpkKernel = + clCreateKernel(rEnv.mpkProgram, "pixSubtract_inplace", &status); + CHECK_OPENCL(status, "clCreateKernel pixSubtract_inplace"); + } - // Enqueue a kernel run call. - status = clSetKernelArg(rEnv.mpkKernel, - 0, - sizeof(cl_mem), - &buffer1); - status = clSetKernelArg(rEnv.mpkKernel, - 1, - sizeof(cl_mem), - &buffer2); - status = clSetKernelArg(rEnv.mpkKernel, - 2, - sizeof(wpl), - &wpl); - status = clSetKernelArg(rEnv.mpkKernel, - 3, - sizeof(h), - &h); - if (outBuffer != NULL) - { - status = clSetKernelArg(rEnv.mpkKernel, - 4, - sizeof(cl_mem), - &outBuffer); - } - status = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, - rEnv.mpkKernel, - 2, - NULL, - globalThreads, - localThreads, - 0, - NULL, - NULL); + // Enqueue a kernel run call. + status = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &buffer1); + status = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(cl_mem), &buffer2); + status = clSetKernelArg(rEnv.mpkKernel, 2, sizeof(wpl), &wpl); + status = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(h), &h); + if (outBuffer != NULL) { + status = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &outBuffer); + } + status = + clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 2, NULL, + globalThreads, localThreads, 0, NULL, NULL); - return status; + return status; } // OpenCL implementation of Subtract pix @@ -2260,8 +2140,7 @@ int OpenclDevice::HistogramRectOCL(unsigned char *imageData, // using a garlic bus memory type cl_mem imageBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - width * height * bytes_per_pixel * sizeof(char), imageData, - &clStatus); + width * height * bytes_per_pixel * sizeof(char), imageData, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); /* setup work group size parameters */ @@ -2284,8 +2163,8 @@ int OpenclDevice::HistogramRectOCL(unsigned char *imageData, cl_mem histogramBuffer = clCreateBuffer( histKern.mpkContext, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, - kHistogramSize * bytes_per_pixel * sizeof(int), - histogramAllChannels, &clStatus); + kHistogramSize * bytes_per_pixel * sizeof(int), histogramAllChannels, + &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer histogramBuffer"); /* intermediate histogram buffer */ @@ -2327,48 +2206,53 @@ int OpenclDevice::HistogramRectOCL(unsigned char *imageData, void *ptr; //Initialize tmpHistogramBuffer buffer - ptr = clEnqueueMapBuffer(histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, CL_MAP_WRITE, 0, tmpHistogramBins*sizeof(cl_uint), 0, NULL, NULL, &clStatus); + ptr = clEnqueueMapBuffer( + histKern.mpkCmdQueue, tmpHistogramBuffer, CL_TRUE, CL_MAP_WRITE, 0, + tmpHistogramBins * sizeof(cl_uint), 0, NULL, NULL, &clStatus); CHECK_OPENCL( clStatus, "clEnqueueMapBuffer tmpHistogramBuffer"); memset(ptr, 0, tmpHistogramBins*sizeof(cl_uint)); - clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0, NULL, NULL); + clEnqueueUnmapMemObject(histKern.mpkCmdQueue, tmpHistogramBuffer, ptr, 0, + NULL, NULL); /* set kernel 1 arguments */ - clStatus = clSetKernelArg( histKern.mpkKernel, 0, sizeof(cl_mem), &imageBuffer ); + clStatus = + clSetKernelArg(histKern.mpkKernel, 0, sizeof(cl_mem), &imageBuffer); CHECK_OPENCL( clStatus, "clSetKernelArg imageBuffer"); cl_uint numPixels = width*height; - clStatus = clSetKernelArg( histKern.mpkKernel, 1, sizeof(cl_uint), &numPixels ); + clStatus = + clSetKernelArg(histKern.mpkKernel, 1, sizeof(cl_uint), &numPixels); CHECK_OPENCL( clStatus, "clSetKernelArg numPixels" ); - clStatus = clSetKernelArg( histKern.mpkKernel, 2, sizeof(cl_mem), &tmpHistogramBuffer ); + clStatus = clSetKernelArg(histKern.mpkKernel, 2, sizeof(cl_mem), + &tmpHistogramBuffer); CHECK_OPENCL( clStatus, "clSetKernelArg tmpHistogramBuffer"); /* set kernel 2 arguments */ int n = numThreads/bytes_per_pixel; - clStatus = clSetKernelArg( histRedKern.mpkKernel, 0, sizeof(cl_int), &n ); + clStatus = clSetKernelArg(histRedKern.mpkKernel, 0, sizeof(cl_int), &n); CHECK_OPENCL( clStatus, "clSetKernelArg imageBuffer"); - clStatus = clSetKernelArg( histRedKern.mpkKernel, 1, sizeof(cl_mem), &tmpHistogramBuffer ); + clStatus = clSetKernelArg(histRedKern.mpkKernel, 1, sizeof(cl_mem), + &tmpHistogramBuffer); CHECK_OPENCL( clStatus, "clSetKernelArg tmpHistogramBuffer"); - clStatus = clSetKernelArg( histRedKern.mpkKernel, 2, sizeof(cl_mem), &histogramBuffer ); + clStatus = clSetKernelArg(histRedKern.mpkKernel, 2, sizeof(cl_mem), + &histogramBuffer); CHECK_OPENCL( clStatus, "clSetKernelArg histogramBuffer"); /* launch histogram */ PERF_COUNT_SUB("before") - clStatus = clEnqueueNDRangeKernel( - histKern.mpkCmdQueue, - histKern.mpkKernel, - 1, NULL, global_work_size, local_work_size, - 0, NULL, NULL ); - CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels" ); - clFinish( histKern.mpkCmdQueue ); - if (clStatus != 0) { - retVal = -1; +clStatus = clEnqueueNDRangeKernel(histKern.mpkCmdQueue, histKern.mpkKernel, 1, + NULL, global_work_size, local_work_size, 0, + NULL, NULL); +CHECK_OPENCL(clStatus, + "clEnqueueNDRangeKernel kernel_HistogramRectAllChannels"); +clFinish(histKern.mpkCmdQueue); +if (clStatus != 0) { + retVal = -1; } /* launch histogram */ clStatus = clEnqueueNDRangeKernel( - histRedKern.mpkCmdQueue, - histRedKern.mpkKernel, - 1, NULL, red_global_work_size, local_work_size, - 0, NULL, NULL ); + histRedKern.mpkCmdQueue, histRedKern.mpkKernel, 1, NULL, + red_global_work_size, local_work_size, 0, NULL, NULL); CHECK_OPENCL( clStatus, "clEnqueueNDRangeKernel kernel_HistogramRectAllChannelsReduction" ); clFinish( histRedKern.mpkCmdQueue ); if (clStatus != 0) { @@ -2377,12 +2261,16 @@ PERF_COUNT_SUB("before") PERF_COUNT_SUB("redKernel") /* map results back from gpu */ - ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE, CL_MAP_READ, 0, kHistogramSize*bytes_per_pixel*sizeof(int), 0, NULL, NULL, &clStatus); + ptr = clEnqueueMapBuffer(histRedKern.mpkCmdQueue, histogramBuffer, CL_TRUE, + CL_MAP_READ, 0, + kHistogramSize * bytes_per_pixel * sizeof(int), 0, + NULL, NULL, &clStatus); CHECK_OPENCL( clStatus, "clEnqueueMapBuffer histogramBuffer"); if (clStatus != 0) { retVal = -1; } - clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0, NULL, NULL); + clEnqueueUnmapMemObject(histRedKern.mpkCmdQueue, histogramBuffer, ptr, 0, + NULL, NULL); clReleaseMemObject(histogramBuffer); clReleaseMemObject(imageBuffer); @@ -2398,10 +2286,9 @@ return retVal; ************************************************************************/ int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, int bytes_per_pixel, int bytes_per_line, - int *thresholds, - int *hi_values, Pix **pix, - int height, int width, int top, - int left) { + int *thresholds, int *hi_values, + Pix **pix, int height, int width, + int top, int left) { PERF_COUNT_START("ThresholdRectToPixOCL") int retVal = 0; /* create pix result buffer */ @@ -2432,10 +2319,9 @@ int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, // coherent which we don't need. // faster option would be to allocate initial image buffer // using a garlic bus memory type - cl_mem imageBuffer = - clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - width * height * bytes_per_pixel * sizeof(char), - imageData, &clStatus); + cl_mem imageBuffer = clCreateBuffer( + rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + width * height * bytes_per_pixel * sizeof(char), imageData, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer imageBuffer"); /* map pix as write only */ @@ -2445,13 +2331,13 @@ int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, CHECK_OPENCL(clStatus, "clCreateBuffer pix"); /* map thresholds and hi_values */ - cl_mem thresholdsBuffer = clCreateBuffer( - rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - bytes_per_pixel * sizeof(int), thresholds, &clStatus); + cl_mem thresholdsBuffer = + clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + bytes_per_pixel * sizeof(int), thresholds, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer thresholdBuffer"); - cl_mem hiValuesBuffer = clCreateBuffer( - rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, - bytes_per_pixel * sizeof(int), hi_values, &clStatus); + cl_mem hiValuesBuffer = + clCreateBuffer(rEnv.mpkContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + bytes_per_pixel * sizeof(int), hi_values, &clStatus); CHECK_OPENCL(clStatus, "clCreateBuffer hiValuesBuffer"); /* compile kernel */ @@ -2466,8 +2352,7 @@ int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, } /* set kernel arguments */ - clStatus = - clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &imageBuffer); + clStatus = clSetKernelArg(rEnv.mpkKernel, 0, sizeof(cl_mem), &imageBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg imageBuffer"); cl_uint numPixels = width * height; clStatus = clSetKernelArg(rEnv.mpkKernel, 1, sizeof(int), &height); @@ -2476,21 +2361,19 @@ int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, CHECK_OPENCL(clStatus, "clSetKernelArg width"); clStatus = clSetKernelArg(rEnv.mpkKernel, 3, sizeof(int), &wpl); CHECK_OPENCL(clStatus, "clSetKernelArg wpl"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), - &thresholdsBuffer); + clStatus = + clSetKernelArg(rEnv.mpkKernel, 4, sizeof(cl_mem), &thresholdsBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg thresholdsBuffer"); - clStatus = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(cl_mem), - &hiValuesBuffer); + clStatus = clSetKernelArg(rEnv.mpkKernel, 5, sizeof(cl_mem), &hiValuesBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg hiValuesBuffer"); - clStatus = - clSetKernelArg(rEnv.mpkKernel, 6, sizeof(cl_mem), &pixThBuffer); + clStatus = clSetKernelArg(rEnv.mpkKernel, 6, sizeof(cl_mem), &pixThBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg pixThBuffer"); /* launch kernel & wait */ PERF_COUNT_SUB("before") - clStatus = - clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 1, NULL, - global_work_size, local_work_size, 0, NULL, NULL); + clStatus = clEnqueueNDRangeKernel(rEnv.mpkCmdQueue, rEnv.mpkKernel, 1, + NULL, global_work_size, local_work_size, + 0, NULL, NULL); CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_ThresholdRectToPix"); clFinish(rEnv.mpkCmdQueue); PERF_COUNT_SUB("kernel") @@ -2503,7 +2386,8 @@ int OpenclDevice::ThresholdRectToPixOCL(unsigned char *imageData, clEnqueueMapBuffer(rEnv.mpkCmdQueue, pixThBuffer, CL_TRUE, CL_MAP_READ, 0, pixSize, 0, NULL, NULL, &clStatus); CHECK_OPENCL(clStatus, "clEnqueueMapBuffer histogramBuffer"); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, NULL, NULL); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, pixThBuffer, ptr, 0, NULL, + NULL); clReleaseMemObject(imageBuffer); clReleaseMemObject(thresholdsBuffer); @@ -2644,7 +2528,8 @@ double composeRGBPixelMicroBench( GPUEnv *env, TessScoreEvaluationInputData inpu OpenclDevice::gpuEnv = *env; int wpl = pixGetWpl(input.pix); - OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height, wpl, NULL); + OpenclDevice::pixReadFromTiffKernel(tiffdata, input.width, input.height, + wpl, NULL); #if ON_WINDOWS QueryPerformanceCounter(&time_funct_end); time = (time_funct_end.QuadPart-time_funct_start.QuadPart)/(double)(freq.QuadPart); @@ -2796,13 +2681,13 @@ void ThresholdRectToPix_Native(const unsigned char* imagedata, int height = pixGetHeight(*pix); *pix = pixCreate(width, height, 1); - uinT32* pixdata = pixGetData(*pix); + uinT32 *pixdata = pixGetData(*pix); int wpl = pixGetWpl(*pix); const unsigned char* srcdata = imagedata + top * bytes_per_line + left * bytes_per_pixel; for (int y = 0; y < height; ++y) { - const uinT8* linedata = srcdata; - uinT32* pixline = pixdata + y * wpl; + const uinT8 *linedata = srcdata; + uinT32 *pixline = pixdata + y * wpl; for (int x = 0; x < width; ++x, linedata += bytes_per_pixel) { bool white_result = true; for (int ch = 0; ch < bytes_per_pixel; ++ch) { @@ -2953,7 +2838,10 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in OpenclDevice::gpuEnv = *env; OpenclDevice::initMorphCLAllocations(wpl, input.height, input.pix); Pix *pix_vline = NULL, *pix_hline = NULL, *pix_closed = NULL; - OpenclDevice::pixGetLinesCL(NULL, input.pix, &pix_vline, &pix_hline, &pix_closed, true, closing_brick, closing_brick, max_line_width, max_line_width, min_line_length, min_line_length); + OpenclDevice::pixGetLinesCL( + NULL, input.pix, &pix_vline, &pix_hline, &pix_closed, true, + closing_brick, closing_brick, max_line_width, max_line_width, + min_line_length, min_line_length); OpenclDevice::releaseMorphCLBuffers(); @@ -2978,8 +2866,10 @@ double getLineMasksMorphMicroBench( GPUEnv *env, TessScoreEvaluationInputData in // native serial code Pix *src_pix = input.pix; - Pix *pix_closed = pixCloseBrick(NULL, src_pix, closing_brick, closing_brick); - Pix *pix_solid = pixOpenBrick(NULL, pix_closed, max_line_width, max_line_width); + Pix *pix_closed = + pixCloseBrick(NULL, src_pix, closing_brick, closing_brick); + Pix *pix_solid = + pixOpenBrick(NULL, pix_closed, max_line_width, max_line_width); Pix *pix_hollow = pixSubtract(NULL, pix_closed, pix_solid); pixDestroy(&pix_solid); Pix *pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length); @@ -3112,9 +3002,8 @@ ds_device OpenclDevice::getDeviceSelection( ) { // PERF_COUNT_SUB("populateTessScoreEvaluationInputData") // perform evaluations unsigned int numUpdates; - status = - profileDevices(profile, DS_EVALUATE_ALL, evaluateScoreForDevice, - &input, &numUpdates); + status = profileDevices(profile, DS_EVALUATE_ALL, + evaluateScoreForDevice, &input, &numUpdates); PERF_COUNT_SUB("profileDevices") // write scores to file if (status == DS_SUCCESS) { @@ -3301,11 +3190,9 @@ Pix *OpenclDevice::pixConvertRGBToGrayOCL(Pix *srcPix, // 32-bit source CHECK_OPENCL(clStatus, "clCreateKernel kernel_RGBToGray"); /* set kernel arguments */ - clStatus = - clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), &srcBuffer); + clStatus = clSetKernelArg(kEnv.mpkKernel, 0, sizeof(cl_mem), &srcBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg srcBuffer"); - clStatus = - clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), &dstBuffer); + clStatus = clSetKernelArg(kEnv.mpkKernel, 1, sizeof(cl_mem), &dstBuffer); CHECK_OPENCL(clStatus, "clSetKernelArg dstBuffer"); clStatus = clSetKernelArg(kEnv.mpkKernel, 2, sizeof(int), &srcWPL); CHECK_OPENCL(clStatus, "clSetKernelArg srcWPL"); @@ -3324,9 +3211,9 @@ Pix *OpenclDevice::pixConvertRGBToGrayOCL(Pix *srcPix, // 32-bit source /* launch kernel & wait */ PERF_COUNT_SUB("before") - clStatus = - clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, NULL, - global_work_size, local_work_size, 0, NULL, NULL); + clStatus = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, kEnv.mpkKernel, 1, + NULL, global_work_size, local_work_size, + 0, NULL, NULL); CHECK_OPENCL(clStatus, "clEnqueueNDRangeKernel kernel_RGBToGray"); clFinish(kEnv.mpkCmdQueue); PERF_COUNT_SUB("kernel") @@ -3336,7 +3223,8 @@ Pix *OpenclDevice::pixConvertRGBToGrayOCL(Pix *srcPix, // 32-bit source clEnqueueMapBuffer(kEnv.mpkCmdQueue, dstBuffer, CL_TRUE, CL_MAP_READ, 0, dstSize, 0, NULL, NULL, &clStatus); CHECK_OPENCL(clStatus, "clEnqueueMapBuffer dstBuffer"); - clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, dstBuffer, ptr, 0, NULL, NULL); + clEnqueueUnmapMemObject(rEnv.mpkCmdQueue, dstBuffer, ptr, 0, NULL, + NULL); #if 0 // validate: compute on cpu @@ -3381,4 +3269,4 @@ Pix *OpenclDevice::pixConvertRGBToGrayOCL(Pix *srcPix, // 32-bit source // success return dstPix; } -#endif +#endif \ No newline at end of file diff --git a/opencl/openclwrapper.h b/opencl/openclwrapper.h index 0fbaf89d7d..e09e371dbb 100644 --- a/opencl/openclwrapper.h +++ b/opencl/openclwrapper.h @@ -298,15 +298,14 @@ class OpenclDevice inline static int AddKernelConfig( int kCount, const char *kName ); /* for binarization */ - static int HistogramRectOCL(unsigned char *imagedata, - int bytes_per_pixel, int bytes_per_line, - int left, int top, int width, int height, - int kHistogramSize, int *histogramAllChannels); + static int HistogramRectOCL(unsigned char *imagedata, int bytes_per_pixel, + int bytes_per_line, int left, int top, + int width, int height, int kHistogramSize, + int *histogramAllChannels); static int ThresholdRectToPixOCL(unsigned char *imagedata, int bytes_per_pixel, int bytes_per_line, - int *thresholds, - int *hi_values, Pix **pix, + int *thresholds, int *hi_values, Pix **pix, int rect_height, int rect_width, int rect_top, int rect_left); diff --git a/textord/blkocc.h b/textord/blkocc.h index d80afe25f3..f27bb9a5a5 100644 --- a/textord/blkocc.h +++ b/textord/blkocc.h @@ -52,9 +52,8 @@ class REGION_OCC:public ELIST_LINK float max_x; //Highest x in region inT16 region_type; //Type of crossing - REGION_OCC() { - } //constructor used - //only in COPIER etc + REGION_OCC() {} // constructor used + // only in COPIER etc REGION_OCC( //constructor float min, float max, From ba5a17b4ba5b532c1d49eefad58b660f7bcb022c Mon Sep 17 00:00:00 2001 From: Philipp Nordhus Date: Fri, 17 Jun 2016 02:31:20 +0200 Subject: [PATCH 086/132] Remove duplicate destructor Destructor of base class GenericVector calls base class clear() method, deallocating the memory. --- dict/dawg.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dict/dawg.h b/dict/dawg.h index c7169167d8..bd789f3722 100644 --- a/dict/dawg.h +++ b/dict/dawg.h @@ -375,14 +375,6 @@ struct DawgPosition { class DawgPositionVector : public GenericVector { public: - /// Overload destructor, since clear() does not delete data_[] any more. - ~DawgPositionVector() { - if (size_reserved_ > 0) { - delete[] data_; - size_used_ = 0; - size_reserved_ = 0; - } - } /// Overload clear() in order to avoid allocating/deallocating memory /// when clearing the vector and re-inserting entries into it later. void clear() { size_used_ = 0; } From bdb690ba06ede3485fe553a2621c29c6a86df0d6 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Wed, 7 Dec 2016 13:21:05 -0800 Subject: [PATCH 087/132] Implement a new orientation and script detection API for C and C++ See issue #424. The existing C API for TessBaseAPIDetectOS requires a C caller to successfully allocate struct OSResults which is actually a C++ class. Generally it won't be possible for a regular C compiler to do this properly. It's also assumed that most API level users of Tesseract are only interested in Tesseract's best guess as to script and orientation, not the individual values for all possible scripts. This introduces a new API with a better name that is more closely aligned with the output of 'tesseract -psm 0'. Both tesseract -psm 0 and this API now share the same code in baseapi.cpp. --- api/baseapi.cpp | 37 +++++++++++++++++++++++++------------ api/baseapi.h | 10 ++++++++++ api/capi.cpp | 31 ++++++++++++++++++++++++++++++- api/capi.h | 7 +++++++ 4 files changed, 72 insertions(+), 13 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index a5540aeeaa..6522470771 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1888,31 +1888,44 @@ char* TessBaseAPI::GetUNLVText() { return result; } -/** - * The recognized text is returned as a char* which is coded - * as UTF8 and must be freed with the delete [] operator. - * page_number is a 0-based page index that will appear in the osd file. - */ -char* TessBaseAPI::GetOsdText(int page_number) { +bool TessBaseAPI::DetectOrientationScript(int& orient_deg, float& orient_conf, std::string& script, float& script_conf) { OSResults osr; bool osd = DetectOS(&osr); if (!osd) { - return NULL; + return false; } int orient_id = osr.best_result.orientation_id; int script_id = osr.get_best_script(orient_id); - float orient_conf = osr.best_result.oconfidence; - float script_conf = osr.best_result.sconfidence; + orient_conf = osr.best_result.oconfidence; + script_conf = osr.best_result.sconfidence; const char* script_name = osr.unicharset->get_script_from_script_id(script_id); // clockwise orientation of the input image, in degrees - int orient_deg = orient_id * 90; + orient_deg = orient_id * 90; + + script = script_name; + return true; +} + +/** + * The recognized text is returned as a char* which is coded + * as UTF8 and must be freed with the delete [] operator. + * page_number is a 0-based page index that will appear in the osd file. + */ +char* TessBaseAPI::GetOsdText(int page_number) { + int orient_deg; + float orient_conf; + std::string script_name; + float script_conf; + + if (!DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf)) + return NULL; // clockwise rotation needed to make the page upright - int rotate = OrientationIdToValue(orient_id); + int rotate = OrientationIdToValue(orient_deg / 90); const int kOsdBufsize = 255; char* osd_buf = new char[kOsdBufsize]; @@ -1923,7 +1936,7 @@ char* TessBaseAPI::GetOsdText(int page_number) { "Orientation confidence: %.2f\n" "Script: %s\n" "Script confidence: %.2f\n", - page_number, orient_deg, rotate, orient_conf, script_name, + page_number, orient_deg, rotate, orient_conf, script_name.c_str(), script_conf); return osd_buf; diff --git a/api/baseapi.h b/api/baseapi.h index 65afa36269..2e1a989acd 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -26,6 +26,7 @@ (patch)) #include +#include // To avoid collision with other typenames include the ABSOLUTE MINIMUM // complexity of includes here. Use forward declarations wherever possible // and hide includes of complex types in baseapi.cpp. @@ -618,6 +619,15 @@ class TESS_API TessBaseAPI { */ char* GetUNLVText(); + /** + * Detect the orientation of the input image and apparent script (alphabet). + * orient_deg is the detected clockwise rotation of the input image + * orient_conf is the confidence (15.0 is reasonable) + * script is an ASCII string, the name of the script, e.g. "Latin" + * script_conf is confidence level in the script + */ + bool DetectOrientationScript(int& orient_deg, float& orient_conf, std::string& script, float& script_conf); + /** * The recognized text is returned as a char* which is coded * as UTF8 and must be freed with the delete [] operator. diff --git a/api/capi.cpp b/api/capi.cpp index 849d296104..7e1fe3665a 100644 --- a/api/capi.cpp +++ b/api/capi.cpp @@ -538,9 +538,38 @@ TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* hand TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results) { - return handle->DetectOS(results) ? TRUE : FALSE; + return FALSE; // Unsafe ABI, return FALSE always } +TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, char** best_script_name, + int* best_orientation_deg, float* script_confidence, + float* orientation_confidence) +{ + int orient_deg; + float orient_conf; + std::string script_name; + float script_conf; + BOOL success; + + success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf); + if (!success) + return FALSE; + if (best_script_name) { + *best_script_name = new char [script_name.length() + 1]; + strcpy(*best_script_name, script_name.c_str()); + } + + if (best_orientation_deg) + *best_orientation_deg = orient_deg; + if (script_confidence) + *script_confidence = script_conf; + if (orientation_confidence) + *orientation_confidence = orient_conf; + + return TRUE; +} + + TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, int* num_features, int* FeatureOutlineIndex) { diff --git a/api/capi.h b/api/capi.h index a0c54a20e4..be51454f78 100644 --- a/api/capi.h +++ b/api/capi.h @@ -285,8 +285,15 @@ TESS_API void TESS_CALL TessBaseAPIClearPersistentCache(TessBaseAPI* handle); TESS_API void TESS_CALL TessBaseAPISetProbabilityInContextFunc(TessBaseAPI* handle, TessProbabilityInContextFunc f); TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, TessFillLatticeFunc f); + +// Deprecated, no longer working TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results); +// Call TessDeleteText(*best_script_name) to free memory allocated by this function +TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, char** best_script_name, + int* best_orientation_deg, float* script_confidence, + float* orientation_confidence); + TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, int* num_features, int* FeatureOutlineIndex); From 66c03c91662ae04d782c51bc461b9b11c9d1f1b7 Mon Sep 17 00:00:00 2001 From: "James R. Barlow" Date: Thu, 8 Dec 2016 15:08:48 -0800 Subject: [PATCH 088/132] Revise after code review --- api/baseapi.cpp | 34 ++++++++++++++++++++++++---------- api/baseapi.h | 10 +++++----- api/capi.cpp | 28 ++++------------------------ api/capi.h | 5 ++--- 4 files changed, 35 insertions(+), 42 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 6522470771..582fbfbbc8 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1888,7 +1888,15 @@ char* TessBaseAPI::GetUNLVText() { return result; } -bool TessBaseAPI::DetectOrientationScript(int& orient_deg, float& orient_conf, std::string& script, float& script_conf) { +/** + * Detect the orientation of the input image and apparent script (alphabet). + * orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270) + * orient_conf is the confidence (15.0 is reasonably confident) + * script_name is an ASCII string, the name of the script, e.g. "Latin" + * script_conf is confidence level in the script + * Returns true on success and writes values to each parameter as an output + */ +bool TessBaseAPI::DetectOrientationScript(int* orient_deg, float* orient_conf, const char** script_name, float* script_conf) { OSResults osr; bool osd = DetectOS(&osr); @@ -1898,15 +1906,21 @@ bool TessBaseAPI::DetectOrientationScript(int& orient_deg, float& orient_conf, s int orient_id = osr.best_result.orientation_id; int script_id = osr.get_best_script(orient_id); - orient_conf = osr.best_result.oconfidence; - script_conf = osr.best_result.sconfidence; - const char* script_name = + if (orient_conf) + *orient_conf = osr.best_result.oconfidence; + if (orient_deg) + *orient_deg = orient_id * 90; // convert quadrant to degrees + + if (script_name) { + const char* script = osr.unicharset->get_script_from_script_id(script_id); - // clockwise orientation of the input image, in degrees - orient_deg = orient_id * 90; + *script_name = script; + } - script = script_name; + if (script_conf) + *script_conf = osr.best_result.sconfidence; + return true; } @@ -1918,10 +1932,10 @@ bool TessBaseAPI::DetectOrientationScript(int& orient_deg, float& orient_conf, s char* TessBaseAPI::GetOsdText(int page_number) { int orient_deg; float orient_conf; - std::string script_name; + const char* script_name; float script_conf; - if (!DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf)) + if (!DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf)) return NULL; // clockwise rotation needed to make the page upright @@ -1936,7 +1950,7 @@ char* TessBaseAPI::GetOsdText(int page_number) { "Orientation confidence: %.2f\n" "Script: %s\n" "Script confidence: %.2f\n", - page_number, orient_deg, rotate, orient_conf, script_name.c_str(), + page_number, orient_deg, rotate, orient_conf, script_name, script_conf); return osd_buf; diff --git a/api/baseapi.h b/api/baseapi.h index 2e1a989acd..e70558fbc0 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -26,7 +26,6 @@ (patch)) #include -#include // To avoid collision with other typenames include the ABSOLUTE MINIMUM // complexity of includes here. Use forward declarations wherever possible // and hide includes of complex types in baseapi.cpp. @@ -621,12 +620,13 @@ class TESS_API TessBaseAPI { /** * Detect the orientation of the input image and apparent script (alphabet). - * orient_deg is the detected clockwise rotation of the input image - * orient_conf is the confidence (15.0 is reasonable) - * script is an ASCII string, the name of the script, e.g. "Latin" + * orient_deg is the detected clockwise rotation of the input image in degrees (0, 90, 180, 270) + * orient_conf is the confidence (15.0 is reasonably confident) + * script_name is an ASCII string, the name of the script, e.g. "Latin" * script_conf is confidence level in the script + * Returns true on success and writes values to each parameter as an output */ - bool DetectOrientationScript(int& orient_deg, float& orient_conf, std::string& script, float& script_conf); + bool DetectOrientationScript(int* orient_deg, float* orient_conf, const char** script_name, float* script_conf); /** * The recognized text is returned as a char* which is coded diff --git a/api/capi.cpp b/api/capi.cpp index 7e1fe3665a..57bed872df 100644 --- a/api/capi.cpp +++ b/api/capi.cpp @@ -541,32 +541,12 @@ TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* resu return FALSE; // Unsafe ABI, return FALSE always } -TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, char** best_script_name, - int* best_orientation_deg, float* script_confidence, - float* orientation_confidence) +TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, + int* orient_deg, float* orient_conf, const char** script_name, float* script_conf) { - int orient_deg; - float orient_conf; - std::string script_name; - float script_conf; - BOOL success; - + bool success; success = handle->DetectOrientationScript(orient_deg, orient_conf, script_name, script_conf); - if (!success) - return FALSE; - if (best_script_name) { - *best_script_name = new char [script_name.length() + 1]; - strcpy(*best_script_name, script_name.c_str()); - } - - if (best_orientation_deg) - *best_orientation_deg = orient_deg; - if (script_confidence) - *script_confidence = script_conf; - if (orientation_confidence) - *orientation_confidence = orient_conf; - - return TRUE; + return (BOOL)success; } diff --git a/api/capi.h b/api/capi.h index be51454f78..93d43f9483 100644 --- a/api/capi.h +++ b/api/capi.h @@ -290,9 +290,8 @@ TESS_API void TESS_CALL TessBaseAPISetFillLatticeFunc(TessBaseAPI* handle, Tess TESS_API BOOL TESS_CALL TessBaseAPIDetectOS(TessBaseAPI* handle, OSResults* results); // Call TessDeleteText(*best_script_name) to free memory allocated by this function -TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, char** best_script_name, - int* best_orientation_deg, float* script_confidence, - float* orientation_confidence); +TESS_API BOOL TESS_CALL TessBaseAPIDetectOrientationScript(TessBaseAPI* handle, + int* orient_deg, float* orient_conf, const char **script_name, float* script_conf); TESS_API void TESS_CALL TessBaseAPIGetFeaturesForBlob(TessBaseAPI* handle, TBLOB* blob, INT_FEATURE_STRUCT* int_features, int* num_features, int* FeatureOutlineIndex); From 23a08b0b800671f7e1782ab61e9cc4af17e333bd Mon Sep 17 00:00:00 2001 From: "alankila@bel.fi" Date: Fri, 9 Dec 2016 15:28:43 +0200 Subject: [PATCH 089/132] Remove unused code. Remove 'cutil/listio.cpp' and 'cutil/listio.h'. Also remove 'strsave' and 'strfree' defines from 'cutil/cutil.h'. --- cutil/Makefile.am | 4 +-- cutil/cutil.h | 20 -------------- cutil/listio.cpp | 67 ----------------------------------------------- cutil/listio.h | 43 ------------------------------ 4 files changed, 2 insertions(+), 132 deletions(-) delete mode 100644 cutil/listio.cpp delete mode 100644 cutil/listio.h diff --git a/cutil/Makefile.am b/cutil/Makefile.am index 15b339c8f3..5b0ffc6ebb 100644 --- a/cutil/Makefile.am +++ b/cutil/Makefile.am @@ -7,7 +7,7 @@ endif noinst_HEADERS = \ bitvec.h callcpp.h const.h cutil.h cutil_class.h danerror.h efio.h \ - emalloc.h freelist.h globals.h listio.h \ + emalloc.h freelist.h globals.h \ oldlist.h structures.h if !USING_MULTIPLELIBS @@ -22,7 +22,7 @@ endif libtesseract_cutil_la_SOURCES = \ bitvec.cpp callcpp.cpp cutil.cpp cutil_class.cpp danerror.cpp efio.cpp \ - emalloc.cpp freelist.cpp listio.cpp \ + emalloc.cpp freelist.cpp \ oldlist.cpp structures.cpp diff --git a/cutil/cutil.h b/cutil/cutil.h index 38b3ff9e39..42967093ab 100644 --- a/cutil/cutil.h +++ b/cutil/cutil.h @@ -92,26 +92,6 @@ typedef void (*void_dest) (void *); #define print_string(str) \ printf ("%s\n", str) -/********************************************************************** - * strfree - * - * Free the memory which was reserved by strsave. - **********************************************************************/ - -#define strfree(s) (free_string(s)) - -/********************************************************************** - * strsave - * - * Reserve a spot in memory for the string to be stored. Copy the string - * to it and return the result. - **********************************************************************/ - -#define strsave(s) \ - ((s) != NULL ? \ - ((char*) strcpy (alloc_string(strlen(s)+1), s)) : \ - (NULL)) - /*---------------------------------------------------------------------- F u n c t i o n s ----------------------------------------------------------------------*/ diff --git a/cutil/listio.cpp b/cutil/listio.cpp deleted file mode 100644 index 475088f5b5..0000000000 --- a/cutil/listio.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* -*-C-*- -################################################################################ -# -# File: listio.c -# Description: List I/O processing procedures. -# Author: Mark Seaman, Software Productivity -# Created: Thu Jul 23 13:24:09 1987 -# Modified: Fri May 17 17:33:30 1991 (Mark Seaman) marks@hpgrlt -# Language: C -# Package: N/A -# Status: Reusable Software Component -# -# (c) Copyright 1987, Hewlett-Packard Company. -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** http://www.apache.org/licenses/LICENSE-2.0 -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -# -################################################################################ - -This file contains the implementations of a set of general purpose -list I/O routines. For the interface definitions look in the file -"listio.h". ----------------------------------------------------------------------------*/ - -#include -#include -#include -#include "listio.h" - -/*--------------------------------------------------------------------------- - Public Function Code ----------------------------------------------------------------------------*/ -/************************************************************************* - * R E A D L I S T - * - * Read a list of strings from a file. Return the string list to the - * caller. - *************************************************************************/ -LIST read_list(const char *filename) { - FILE *infile; - char s[CHARS_PER_LINE]; - LIST list; - - if ((infile = open_file (filename, "r")) == NULL) - return (NIL_LIST); - - list = NIL_LIST; - while (fgets (s, CHARS_PER_LINE, infile) != NULL) { - s[CHARS_PER_LINE - 1] = '\0'; - if (strlen (s) > 0) { - if (s[strlen (s) - 1] == '\n') - s[strlen (s) - 1] = '\0'; - if (strlen (s) > 0) { - list = push (list, (LIST) strsave (s)); - } - } - } - - fclose(infile); - return (reverse_d (list)); -} diff --git a/cutil/listio.h b/cutil/listio.h deleted file mode 100644 index 7d9c19f777..0000000000 --- a/cutil/listio.h +++ /dev/null @@ -1,43 +0,0 @@ -/* -*-C-*- -################################################################################ -# -# File: listio.h -# Description: List I/O processing procedures. -# Author: Mark Seaman, Software Productivity -# Created: Thu Jul 23 13:24:09 1987 -# Modified: Mon Oct 16 11:38:52 1989 (Mark Seaman) marks@hpgrlt -# Language: C -# Package: N/A -# Status: Reusable Software Component -# -# (c) Copyright 1987, Hewlett-Packard Company. -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** http://www.apache.org/licenses/LICENSE-2.0 -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -# -################################################################################ - * Revision 1.5 89/06/27 11:56:00 11:56:00 marks (Mark Seaman) - * Fixed MAC_OR_DOS bug - * - - This file contains the interface definitions to a set of general purpose - list I/O routines. - -***********************************************************************/ -#ifndef LISTIO_H -#define LISTIO_H - -#include -#include "oldlist.h" - -/*---------------------------------------------------------------------------- - Public Function Prototypes ---------------------------------------------------------------------------*/ -LIST read_list(const char *filename); -#endif From 2452051b96195851bb48f91d53b953015645c07f Mon Sep 17 00:00:00 2001 From: amitdo Date: Fri, 9 Dec 2016 16:19:02 +0200 Subject: [PATCH 090/132] Remove 'listio.cpp' and 'listio.h' from vs2010 vcxproj --- vs2010/libtesseract/libtesseract.vcxproj | 4 +--- vs2010/libtesseract/libtesseract.vcxproj.filters | 8 +------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/vs2010/libtesseract/libtesseract.vcxproj b/vs2010/libtesseract/libtesseract.vcxproj index 90edfffa90..10762fe92e 100644 --- a/vs2010/libtesseract/libtesseract.vcxproj +++ b/vs2010/libtesseract/libtesseract.vcxproj @@ -448,7 +448,6 @@ copy "$(TargetDir)$(TargetName).lib" ..\..\..\lib - @@ -716,7 +715,6 @@ copy "$(TargetDir)$(TargetName).lib" ..\..\..\lib - @@ -867,4 +865,4 @@ copy "$(TargetDir)$(TargetName).lib" ..\..\..\lib - \ No newline at end of file + diff --git a/vs2010/libtesseract/libtesseract.vcxproj.filters b/vs2010/libtesseract/libtesseract.vcxproj.filters index 911f3f7b9f..aa18a673e2 100644 --- a/vs2010/libtesseract/libtesseract.vcxproj.filters +++ b/vs2010/libtesseract/libtesseract.vcxproj.filters @@ -361,9 +361,6 @@ Source Files - - Source Files - Source Files @@ -1170,9 +1167,6 @@ Header Files - - Header Files - Header Files @@ -1610,4 +1604,4 @@ Resource Files - \ No newline at end of file + From 71269e79a58bef550c34c618e4c1f5af204b0f14 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sat, 10 Dec 2016 09:19:06 +0100 Subject: [PATCH 091/132] Fix two typos in comments Signed-off-by: Stefan Weil --- ccstruct/rejctmap.cpp | 2 +- neural_networks/runtime/input_file_buffer.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ccstruct/rejctmap.cpp b/ccstruct/rejctmap.cpp index 9c9ff2e503..a2910675a8 100644 --- a/ccstruct/rejctmap.cpp +++ b/ccstruct/rejctmap.cpp @@ -267,7 +267,7 @@ void REJ::full_print(FILE *fp) { //The REJMAP class has been hacked to use alloc_struct instead of new []. //This is to reduce memory fragmentation only as it is rather kludgy. -//alloc_struct by-passes the call to the contsructor of REJ on each +//alloc_struct by-passes the call to the constructor of REJ on each //array element. Although the constructor is empty, the BITS16 members //do have a constructor which sets all the flags to 0. The memset //replaces this functionality. diff --git a/neural_networks/runtime/input_file_buffer.cpp b/neural_networks/runtime/input_file_buffer.cpp index 0d88bec3a7..2ab6d1b341 100644 --- a/neural_networks/runtime/input_file_buffer.cpp +++ b/neural_networks/runtime/input_file_buffer.cpp @@ -18,7 +18,7 @@ #include "input_file_buffer.h" namespace tesseract { -// default and only contsructor +// default and only constructor InputFileBuffer::InputFileBuffer(const string &file_name) : file_name_(file_name) { fp_ = NULL; From d045aaa31a4bf86609b4b555562a0da95c5defc2 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 11 Dec 2016 22:04:17 +0100 Subject: [PATCH 092/132] java: Improve build rules * Fix builds in subdirectories: * Add srcdir to Manifest.txt. * Remove srcdir from piccolo2d-core-3.0.jar and piccolo2d-extras-3.0.jar. * Add dependency of SCROLLVIEW_CLASSES on SCROLLVIEW_LIBS. The SCROLLVIEW_LIBS are now automatically fetched when needed. * Add .PHONY target for fetch-jars. * Improve rule for clean target. Signed-off-by: Stefan Weil --- java/Makefile.am | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/java/Makefile.am b/java/Makefile.am index fddbc6f9ec..af3b1885c3 100644 --- a/java/Makefile.am +++ b/java/Makefile.am @@ -36,19 +36,20 @@ SCROLLVIEW_CLASSES = \ com/google/scrollview/ScrollView.class SCROLLVIEW_LIBS = \ - $(srcdir)/piccolo2d-core-3.0.jar \ - $(srcdir)/piccolo2d-extras-3.0.jar + piccolo2d-core-3.0.jar \ + piccolo2d-extras-3.0.jar -CLASSPATH = $(srcdir)/piccolo2d-core-3.0.jar:$(srcdir)/piccolo2d-extras-3.0.jar +CLASSPATH = piccolo2d-core-3.0.jar:piccolo2d-extras-3.0.jar ScrollView.jar : $(SCROLLVIEW_CLASSES) - $(JAR) cfm $@ Manifest.txt com/google/scrollview/*.class \ + $(JAR) cfm $@ $(srcdir)/Manifest.txt com/google/scrollview/*.class \ com/google/scrollview/events/*.class com/google/scrollview/ui/*.class -$(SCROLLVIEW_CLASSES) : $(SCROLLVIEW_FILES) +$(SCROLLVIEW_CLASSES) : $(SCROLLVIEW_FILES) $(SCROLLVIEW_LIBS) $(JAVAC) -encoding UTF8 -sourcepath $(srcdir) -classpath $(CLASSPATH) $(SCROLLVIEW_FILES) -d $(builddir) -fetch-jars : +.PHONY: fetch-jars +fetch-jars $(SCROLLVIEW_LIBS): curl -L http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-core/3.0/piccolo2d-core-3.0.jar > piccolo2d-core-3.0.jar curl -L http://search.maven.org/remotecontent?filepath=org/piccolo2d/piccolo2d-extras/3.0/piccolo2d-extras-3.0.jar > piccolo2d-extras-3.0.jar @@ -64,7 +65,7 @@ uninstall: endif clean : - rm -f ScrollView.jar *.class $(srcdir)/*.class + rm -f ScrollView.jar $(SCROLLVIEW_CLASSES) # all-am does nothing, to make the java part optional. all all-am install : From 8af3629e9fef121ffca70f19120eb795b00383b3 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 11 Dec 2016 22:43:37 +0100 Subject: [PATCH 093/132] openmp: Fix OpenMP support * Add OPENMP_CXXFLAGS for ccmain. * Replace OPENMP_CFLAGS by OPENMP_CXXFLAGS. * Always use _OPENMP for conditional compilation. * Remove OPENMP as there is already _OPENMP. * Include omp.h conditionally. Signed-off-by: Stefan Weil --- api/Makefile.am | 5 +---- ccmain/Makefile.am | 1 + ccmain/par_control.cpp | 6 ++++-- configure.ac | 7 ------- 4 files changed, 6 insertions(+), 13 deletions(-) diff --git a/api/Makefile.am b/api/Makefile.am index 9d20919b2e..3bca53ba86 100644 --- a/api/Makefile.am +++ b/api/Makefile.am @@ -81,9 +81,7 @@ tesseract_LDADD = libtesseract.la tesseract_LDFLAGS = $(OPENCL_LDFLAGS) -if OPENMP -tesseract_LDADD += $(OPENMP_CFLAGS) -endif +tesseract_LDADD += $(OPENMP_CXXFLAGS) if T_WIN tesseract_LDADD += -lws2_32 @@ -92,4 +90,3 @@ endif if ADD_RT tesseract_LDADD += -lrt endif - diff --git a/ccmain/Makefile.am b/ccmain/Makefile.am index e82c0031a1..ac6cddcf38 100644 --- a/ccmain/Makefile.am +++ b/ccmain/Makefile.am @@ -7,6 +7,7 @@ AM_CPPFLAGS += \ -I$(top_srcdir)/textord -I$(top_srcdir)/opencl AM_CPPFLAGS += $(OPENCL_CPPFLAGS) +AM_CPPFLAGS += $(OPENMP_CXXFLAGS) if VISIBILITY AM_CPPFLAGS += -DTESS_EXPORTS \ diff --git a/ccmain/par_control.cpp b/ccmain/par_control.cpp index 7a7d0415d6..82cd55c9a0 100644 --- a/ccmain/par_control.cpp +++ b/ccmain/par_control.cpp @@ -18,9 +18,9 @@ /////////////////////////////////////////////////////////////////////// #include "tesseractclass.h" -#ifdef OPENMP +#ifdef _OPENMP #include -#endif // OPENMP +#endif // _OPENMP namespace tesseract { @@ -53,7 +53,9 @@ void Tesseract::PrerecAllWordsPar(const GenericVector& words) { } // Pre-classify all the blobs. if (tessedit_parallelize > 1) { +#ifdef _OPENMP #pragma omp parallel for num_threads(10) +#endif // _OPENMP for (int b = 0; b < blobs.size(); ++b) { *blobs[b].choices = blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL); diff --git a/configure.ac b/configure.ac index 2e8883d05e..be08e538e4 100644 --- a/configure.ac +++ b/configure.ac @@ -171,14 +171,7 @@ if test "$enable_embedded" = "yes"; then fi # check whether to build OpenMP support -AM_CONDITIONAL([OPENMP], false) AC_OPENMP -AS_IF([test "x$OPENMP_CFLAGS" != "x"], - [AM_CONDITIONAL([OPENMP], true) - AM_CPPFLAGS="$OPENMP_CXXFLAGS $AM_CPPFLAGS" - AC_DEFINE([OPENMP], [], [Defined when compiled with OpenMP support])] -) - # check whether to build opencl version AC_MSG_CHECKING([--enable-opencl argument]) From 7f4831bd9b589f06e13f8a33acbb4af5d7409ad4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 23 Dec 2016 10:37:44 +0100 Subject: [PATCH 094/132] increase min autoconf version (2.59) --- configure.ac | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/configure.ac b/configure.ac index be08e538e4..86cb2e7b8c 100644 --- a/configure.ac +++ b/configure.ac @@ -5,7 +5,7 @@ # ---------------------------------------- # Initialization # ---------------------------------------- -AC_PREREQ([2.50]) +AC_PREREQ([2.59]) AC_INIT([tesseract], [3.05.00dev], [https://github.com/tesseract-ocr/tesseract/issues]) AC_PROG_CXX([g++ clang++]) AC_LANG([C++]) @@ -118,7 +118,7 @@ esac includedir="${includedir}/tesseract" AC_ARG_WITH([extra-includes], - [AC_HELP_STRING([--with-extra-includes=DIR], + [AS_HELP_STRING([--with-extra-includes=DIR], [Define an additional directory for include files])], [if test -d "$withval" ; then CFLAGS="$CFLAGS -I$withval" @@ -127,7 +127,7 @@ AC_ARG_WITH([extra-includes], fi]) AC_ARG_WITH([extra-libraries], - [AC_HELP_STRING([--with-extra-libraries=DIR], + [AS_HELP_STRING([--with-extra-libraries=DIR], [Define an additional directory for library files])], [if test -d "$withval" ; then LDFLAGS="$LDFLAGS -L$withval" @@ -137,8 +137,8 @@ AC_ARG_WITH([extra-libraries], AC_MSG_CHECKING([--enable-graphics argument]) AC_ARG_ENABLE([graphics], - [AC_HELP_STRING([--enable-graphics],[enable graphics (ScrollView) (default)]) -AC_HELP_STRING([--disable-graphics],[disable graphics (ScrollView)])], + [AS_HELP_STRING([--enable-graphics],[enable graphics (ScrollView) (default)]) +AS_HELP_STRING([--disable-graphics],[disable graphics (ScrollView)])], [enable_graphics=$enableval], [enable_graphics="yes"]) AC_MSG_RESULT([$enable_graphics]) @@ -150,7 +150,7 @@ fi # Check if cube should be disabled AC_MSG_CHECKING([whether to disable cube]) AC_ARG_ENABLE([cube], - [AC_HELP_STRING([--disable-cube], [don't build cube support (experimental)])], + [AS_HELP_STRING([--disable-cube], [don't build cube support (experimental)])], [disable_cube="yes"], [disable_cube="no"]) AC_MSG_RESULT([$disable_cube]) AM_CONDITIONAL([NO_CUBE_BUILD], [test "$disable_cube" = "yes"]) @@ -255,7 +255,7 @@ AC_SUBST([OPENCL_LDFLAGS]) # http://groups.google.com/group/tesseract-dev/browse_thread/thread/976645ae98189127 AC_MSG_CHECKING([--enable-visibility argument]) AC_ARG_ENABLE([visibility], - [AC_HELP_STRING([--enable-visibility],[enable experimental build with fvisibility (default=no)])], + [AS_HELP_STRING([--enable-visibility],[enable experimental build with fvisibility (default=no)])], [enable_visibility=$enableval], [enable_visibility="no"]) AC_MSG_RESULT([$enable_visibility]) @@ -264,7 +264,7 @@ AM_CONDITIONAL([VISIBILITY], [test "$enable_visibility" = "yes"]) # check whether to build multiple libraries AC_MSG_CHECKING([--enable-multiple-libraries argument]) AC_ARG_ENABLE([multiple-libraries], - [AC_HELP_STRING([--enable-multiple-libraries],[enable multiple libraries (default=no)])], + [AS_HELP_STRING([--enable-multiple-libraries],[enable multiple libraries (default=no)])], [enable_mlibs=$enableval], [enable_mlibs="no"]) AC_MSG_RESULT([$enable_mlibs]) @@ -273,7 +273,7 @@ AM_CONDITIONAL([USING_MULTIPLELIBS], [test "$enable_mlibs" = "yes"]) # Check if tessdata-prefix is disabled AC_MSG_CHECKING([whether to use tessdata-prefix]) AC_ARG_ENABLE([tessdata-prefix], - [AC_HELP_STRING([--disable-tessdata-prefix], + [AS_HELP_STRING([--disable-tessdata-prefix], [don't set TESSDATA-PREFIX during compile])], [tessdata_prefix="no"], [tessdata_prefix="yes"]) AC_MSG_RESULT([$tessdata_prefix]) @@ -282,7 +282,7 @@ AM_CONDITIONAL([NO_TESSDATA_PREFIX], [test "$tessdata_prefix" = "no"]) # Check whether enable debuging AC_MSG_CHECKING([whether to enable debugging]) AC_ARG_ENABLE([debug], - [AC_HELP_STRING([--enable-debug], + [AS_HELP_STRING([--enable-debug], [turn on debugging (default=no)])], [debug=$enableval], [debug="no"]) From 092c7d56909e0f3294079fff186393ead5586c4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Mon, 26 Dec 2016 12:10:31 +0100 Subject: [PATCH 095/132] require leptonica 1.74 or higher --- configure.ac | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/configure.ac b/configure.ac index 86cb2e7b8c..89667f9337 100644 --- a/configure.ac +++ b/configure.ac @@ -433,24 +433,16 @@ do done done -if test "$have_lept" = yes ; then - AC_MSG_RESULT(yes) - AC_CHECK_LIB([lept], [l_generateCIDataForPdf], [], - [AC_MSG_ERROR([leptonica library with pdf support (>= 1.71) is missing])]) -else - AC_MSG_ERROR([leptonica not found]) -fi - -AC_MSG_CHECKING([leptonica headers version >= 1.71]) +AC_MSG_CHECKING([leptonica headers version >= 1.74]) AC_PREPROC_IFELSE( [AC_LANG_PROGRAM([#include "allheaders.h"], -[#if (LIBLEPT_MAJOR_VERSION >= 1) && (LIBLEPT_MINOR_VERSION >= 71) +[#if (LIBLEPT_MAJOR_VERSION >= 1) && (LIBLEPT_MINOR_VERSION >= 74) int i = 0; #else #error You need to upgrade your leptonica library! #endif])], [AC_MSG_RESULT(yes)], - [AC_MSG_FAILURE([leptonica 1.71 or higher is required])]) + [AC_MSG_FAILURE([leptonica 1.74 or higher is required])]) AM_CONDITIONAL([ENABLE_TRAINING], true) From 245eebdf293ac19f1fb85c36e51daaaa1b5e5a3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Wed, 7 Dec 2016 21:40:51 +0100 Subject: [PATCH 096/132] Multi-page TIFF buffering is broken - fix #233 --- api/baseapi.cpp | 6 ++++-- classify/mastertrainer.cpp | 6 +++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 582fbfbbc8..4104b93d94 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1028,6 +1028,7 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, OpenclDevice od; #endif // USE_OPENCL int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0; + size_t offset = 0; for (; ; ++page) { if (tessedit_page_number >= 0) page = tessedit_page_number; @@ -1039,8 +1040,8 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, } else { #endif // USE_OPENCL pix = (data) ? - pixReadMemTiff(data, size, page) : - pixReadTiff(filename, page); + pixReadMemFromMultipageTiff(data, size, &offset) : + pixReadFromMultipageTiff(filename, &offset); #ifdef USE_OPENCL } #endif // USE_OPENCL @@ -1054,6 +1055,7 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, pixDestroy(&pix); if (!r) return false; if (tessedit_page_number >= 0) break; + if (!offset) break; } return true; #else diff --git a/classify/mastertrainer.cpp b/classify/mastertrainer.cpp index cd7e93b9f2..849fb06010 100644 --- a/classify/mastertrainer.cpp +++ b/classify/mastertrainer.cpp @@ -214,10 +214,14 @@ void MasterTrainer::AddSample(bool verification, const char* unichar, // Must be called after ReadTrainingSamples, as the current number of images // is used as an offset for page numbers in the samples. void MasterTrainer::LoadPageImages(const char* filename) { + size_t offset = 0; int page; Pix* pix; - for (page = 0; (pix = pixReadTiff(filename, page)) != NULL; ++page) { + for (page = 0; ; page++) { + pix = pixReadFromMultipageTiff(filename, &offset); + if (!pix) break; page_images_.push_back(pix); + if (!offset) break; } tprintf("Loaded %d page images from %s\n", page, filename); } From 0889940763d50be2fc01cb62524d1f591ca61e24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Mon, 26 Dec 2016 13:31:47 +0100 Subject: [PATCH 097/132] fix removal of AC_CHECK_LIB([lept]) --- configure.ac | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/configure.ac b/configure.ac index 89667f9337..a387e8ec55 100644 --- a/configure.ac +++ b/configure.ac @@ -433,6 +433,14 @@ do done done +if test "$have_lept" = yes ; then + AC_MSG_RESULT(yes) + AC_CHECK_LIB([lept], [l_generateCIDataForPdf], [], + [AC_MSG_ERROR([leptonica library with pdf support (>= 1.71) is missing])]) +else + AC_MSG_ERROR([leptonica not found]) +fi + AC_MSG_CHECKING([leptonica headers version >= 1.74]) AC_PREPROC_IFELSE( [AC_LANG_PROGRAM([#include "allheaders.h"], From 3df54a43181cf5eaa126532af49a6744a5115fed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Mon, 26 Dec 2016 13:44:20 +0100 Subject: [PATCH 098/132] remove (fake) OPENMP support --- api/Makefile.am | 2 -- ccmain/Makefile.am | 1 - ccmain/par_control.cpp | 6 ------ configure.ac | 3 --- 4 files changed, 12 deletions(-) diff --git a/api/Makefile.am b/api/Makefile.am index 3bca53ba86..d8c1e54cda 100644 --- a/api/Makefile.am +++ b/api/Makefile.am @@ -81,8 +81,6 @@ tesseract_LDADD = libtesseract.la tesseract_LDFLAGS = $(OPENCL_LDFLAGS) -tesseract_LDADD += $(OPENMP_CXXFLAGS) - if T_WIN tesseract_LDADD += -lws2_32 libtesseract_la_LDFLAGS += -no-undefined -Wl,--as-needed -lws2_32 diff --git a/ccmain/Makefile.am b/ccmain/Makefile.am index ac6cddcf38..e82c0031a1 100644 --- a/ccmain/Makefile.am +++ b/ccmain/Makefile.am @@ -7,7 +7,6 @@ AM_CPPFLAGS += \ -I$(top_srcdir)/textord -I$(top_srcdir)/opencl AM_CPPFLAGS += $(OPENCL_CPPFLAGS) -AM_CPPFLAGS += $(OPENMP_CXXFLAGS) if VISIBILITY AM_CPPFLAGS += -DTESS_EXPORTS \ diff --git a/ccmain/par_control.cpp b/ccmain/par_control.cpp index 82cd55c9a0..6797a5a2a7 100644 --- a/ccmain/par_control.cpp +++ b/ccmain/par_control.cpp @@ -18,9 +18,6 @@ /////////////////////////////////////////////////////////////////////// #include "tesseractclass.h" -#ifdef _OPENMP -#include -#endif // _OPENMP namespace tesseract { @@ -53,9 +50,6 @@ void Tesseract::PrerecAllWordsPar(const GenericVector& words) { } // Pre-classify all the blobs. if (tessedit_parallelize > 1) { -#ifdef _OPENMP - #pragma omp parallel for num_threads(10) -#endif // _OPENMP for (int b = 0; b < blobs.size(); ++b) { *blobs[b].choices = blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL); diff --git a/configure.ac b/configure.ac index a387e8ec55..0f1198402c 100644 --- a/configure.ac +++ b/configure.ac @@ -170,9 +170,6 @@ if test "$enable_embedded" = "yes"; then AM_CPPFLAGS="-DEMBEDDED $AM_CPPFLAGS" fi -# check whether to build OpenMP support -AC_OPENMP - # check whether to build opencl version AC_MSG_CHECKING([--enable-opencl argument]) AC_ARG_ENABLE([opencl], From 73890c38bb522b0b3312c070a29c8db8ea25efab Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Fri, 6 Jan 2017 17:19:58 +0300 Subject: [PATCH 099/132] Update cppan.yml --- cppan.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cppan.yml b/cppan.yml index 8065da79e4..b4247c0821 100644 --- a/cppan.yml +++ b/cppan.yml @@ -121,4 +121,4 @@ dependencies: private: # tesseract uses leptonica only internally # and does not expose its interface to users - pvt.cppan.demo.leptonica: master + pvt.cppan.demo.danbloomberg.leptonica: 1.73 From aaf49f7a627c580d58544c249aae0b892ca23977 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 6 Jan 2017 19:52:31 +0100 Subject: [PATCH 100/132] leptonica 1.74.1 is needed for cppan --- cppan.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cppan.yml b/cppan.yml index b4247c0821..71820ca0ba 100644 --- a/cppan.yml +++ b/cppan.yml @@ -121,4 +121,4 @@ dependencies: private: # tesseract uses leptonica only internally # and does not expose its interface to users - pvt.cppan.demo.danbloomberg.leptonica: 1.73 + pvt.cppan.demo.danbloomberg.leptonica: 1.74.1 From d500231f0663d2518c48db5e49553bade94a8f4f Mon Sep 17 00:00:00 2001 From: Jeff Breidenbach Date: Thu, 19 Jan 2017 15:18:52 +0100 Subject: [PATCH 101/132] fix #665 process file list --- api/baseapi.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 4104b93d94..75b5cb393f 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1129,7 +1129,15 @@ bool TessBaseAPI::ProcessPagesInternal(const char* filename, // Maybe we have a filelist if (r != 0 || format == IFF_UNKNOWN) { - STRING s(buf.c_str()); + STRING s; + if (stdInput) { + s = buf.c_str(); + } else { + std::ifstream t(filename); + std::string u((std::istreambuf_iterator(t)), + std::istreambuf_iterator()); + s = u.c_str(); + } return ProcessPagesFileList(NULL, &s, retry_config, timeout_millisec, renderer, tesseract_->tessedit_page_number); From 1999aa17cb390f9ac8843bde7c41ba75deabb34c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Thu, 19 Jan 2017 15:55:07 +0100 Subject: [PATCH 102/132] fix appveyor --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 020331c461..7dc013d30e 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -13,7 +13,7 @@ before_build: - if %platform%==Win32 set vcplatform=Win32 - if %platform%==Win64 set vcplatform=x64 - - curl -fsS -o cppan.zip https://cppan.org/client/cppan-master-Windows-client.zip + - curl -fsS -L -o cppan.zip https://cppan.org/client/cppan-master-Windows-client.zip - 7z x cppan.zip - set PATH=%PATH%;%cd% From 45fd3ede2104598cff3eb62000a9dc18346e2d80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Mon, 13 Feb 2017 20:09:14 +0100 Subject: [PATCH 103/132] fix #712: Ghostscript mangling Tesseract-produced PDFs --- tessdata/pdf.ttf | Bin 572 -> 572 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tessdata/pdf.ttf b/tessdata/pdf.ttf index 8affa23180a49f38ab66b095fabce1c299a7ee93..d1472b20ef1aebbf5e11573867e9ac13873681b9 100644 GIT binary patch delta 166 zcmdnPvWI1YqBj?V2?GP;1O^5Mx%8aMG*Kl-6(D~HkS~#unwY{}@i7(1UIWBt89)IZ zCKe_j`w0-MWaO4qumBlA^&LQhV`7en%!Vn20`dGdUs>2Wz%u7&{mBC{t{h(eY2pPr hDTsO?$q1H45KNOf7+V-wCigN*v2#I$C$D4d0|0CeAf5mK delta 166 zcmdnPvWI1YqBl2#83P031O^5Mh4h@tG*Kl-6(D~HkS~#unwY|6lz9-yUIWBt89)IZ zraz28_7fmh$;d6K0MbCf*a0NiCgym^?3SGn7SC_y7N8ao3kfhz=3s1LWSHE`D8 Date: Mon, 13 Feb 2017 22:47:02 +0300 Subject: [PATCH 104/132] Backport cppan fixes. --- .gitignore | 5 +- ccutil/unicharset.h | 2 +- classify/featdefs.h | 2 +- classify/picofeat.h | 2 +- cppan.yml | 364 +++++++++++++++++++++++++++++--------------- 5 files changed, 248 insertions(+), 127 deletions(-) diff --git a/.gitignore b/.gitignore index 1a43bfe9d3..022cc693c9 100644 --- a/.gitignore +++ b/.gitignore @@ -75,4 +75,7 @@ kernel*.bin # build dirs /build* /cppan -/win* \ No newline at end of file +/win* +*.dll +*.exe +*.lnk diff --git a/ccutil/unicharset.h b/ccutil/unicharset.h index 023e84d5b6..535a274ac7 100644 --- a/ccutil/unicharset.h +++ b/ccutil/unicharset.h @@ -141,7 +141,7 @@ class UNICHARSET { // Custom list of characters and their ligature forms (UTF8) // These map to unicode values in the private use area (PUC) and are supported // by only few font families (eg. Wyld, Adobe Caslon Pro). - static const char* kCustomLigatures[][2]; + static TESS_API const char* kCustomLigatures[][2]; // List of strings for the SpecialUnicharCodes. Keep in sync with the enum. static const char* kSpecialUnicharCodes[SPECIAL_UNICHAR_CODES_COUNT]; diff --git a/classify/featdefs.h b/classify/featdefs.h index 704bbdfde2..7c168f3daa 100644 --- a/classify/featdefs.h +++ b/classify/featdefs.h @@ -77,7 +77,7 @@ int ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ extern const FEATURE_DESC_STRUCT MicroFeatureDesc; -extern const FEATURE_DESC_STRUCT PicoFeatDesc; +extern TESS_API const FEATURE_DESC_STRUCT PicoFeatDesc; extern const FEATURE_DESC_STRUCT CharNormDesc; extern const FEATURE_DESC_STRUCT OutlineFeatDesc; extern const FEATURE_DESC_STRUCT IntFeatDesc; diff --git a/classify/picofeat.h b/classify/picofeat.h index 208b7e7708..966ffc32e7 100644 --- a/classify/picofeat.h +++ b/classify/picofeat.h @@ -61,5 +61,5 @@ extern double_VAR_H(classify_pico_feature_length, 0.05, "Pico Feature Length"); /**---------------------------------------------------------------------------- Global Data Definitions and Declarations ----------------------------------------------------------------------------**/ -extern FLOAT32 PicoFeatureLength; +extern TESS_API FLOAT32 PicoFeatureLength; #endif diff --git a/cppan.yml b/cppan.yml index 71820ca0ba..8851c26887 100644 --- a/cppan.yml +++ b/cppan.yml @@ -1,124 +1,242 @@ local_settings: - cppan_dir: cppan - -files: - - api/.*\.cpp - - ccmain/.*\.cpp - - ccstruct/.*\.cpp - - ccutil/.*\.cpp - - classify/.*\.cpp - - cube/.*\.cpp - - cutil/.*\.cpp - - dict/.*\.cpp - - neural_networks/runtime/.*\.cpp - - opencl/.*\.cpp - - textord/.*\.cpp - - viewer/.*\.cpp - - wordrec/.*\.cpp - - - api/.*\.h - - ccmain/.*\.h - - ccstruct/.*\.h - - ccutil/.*\.h - - classify/.*\.h - - cube/.*\.h - - cutil/.*\.h - - dict/.*\.h - - neural_networks/runtime/.*\.h - - opencl/.*\.h - - textord/.*\.h - - viewer/.*\.h - - wordrec/.*\.h - - - vs2010/port/.* - -include_directories: - private: - - classify - - cube - - cutil - - dict - - neural_networks/runtime - - opencl - - textord - - vs2010/port - - viewer - - wordrec - public: - - api - - ccmain - - ccstruct - - ccutil - -check_function_exists: - - getline - -check_symbol_exists: - snprintf: stdio.h - -check_include_exists: - - dlfcn.h - - inttypes.h - - limits.h - - malloc.h - - memory.h - - stdbool.h - - stdint.h - - stdlib.h - - strings.h - - string.h - - sys/ipc.h - - sys/shm.h - - sys/stat.h - - sys/types.h - - sys/wait.h - - tiffio.h - - unistd.h - - cairo/cairo-version.h - - CL/cl.h - - OpenCL/cl.h - - pango-1.0/pango/pango-features.h - - unicode/uchar.h - -check_type_size: - - long long int - - off_t - - mbstate_t - - wchar_t - - _Bool - -pre_sources: | - # dummy config file - if (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/config_auto.h) - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/config_auto.h) - endif() - -post_sources: | - if (NOT WIN32) - list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/gettimeofday.cpp") - list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/gettimeofday.h") - list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/mathfix.h") - list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/strcasestr.cpp") - list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/strcasestr.h") - list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/strtok_r.cpp") - list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/strtok_r.h") - list(REMOVE_ITEM src "${CMAKE_CURRENT_SOURCE_DIR}/vs2010/port/vcsversion.h") - endif() - -options: - any: - definitions: - public: - - HAVE_CONFIG_H - - _SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS=1 - - USE_STD_NAMESPACE=1 - - WINDLLNAME="tesseract" - shared: - definitions: - public: TESS_EXPORTS - -dependencies: - private: - # tesseract uses leptonica only internally - # and does not expose its interface to users - pvt.cppan.demo.danbloomberg.leptonica: 1.74.1 + #use_shared_libs: true + #generator: Visual Studio 14 2015 Win64 + silent: false + #copy_import_libs: true + build: + c_flags: /W0 + cxx_flags: /W0 + + dependencies: + pvt.cppan.demo.danbloomberg.leptonica: 1 + pvt.cppan.demo.unicode.icu.i18n: "*" + +root_project: pvt.cppan.demo.google.tesseract + +common_settings: + c++: 11 + +projects: + tesseract: + type: lib + export_all_symbols: true + files: + - api/.*\.cpp + - ccmain/.*\.cpp + - ccstruct/.*\.cpp + - ccutil/.*\.cpp + - classify/.*\.cpp + - cube/.*\.cpp + - cutil/.*\.cpp + - dict/.*\.cpp + - neural_networks/runtime/.*\.cpp + - opencl/.*\.cpp + - textord/.*\.cpp + - viewer/.*\.cpp + - wordrec/.*\.cpp + + - api/.*\.h + - ccmain/.*\.h + - ccstruct/.*\.h + - ccutil/.*\.h + - classify/.*\.h + - cube/.*\.h + - cutil/.*\.h + - dict/.*\.h + - neural_networks/runtime/.*\.h + - opencl/.*\.h + - textord/.*\.h + - viewer/.*\.h + - wordrec/.*\.h + + - vs2010/port/.* + + exclude_from_build: + - api/tesseractmain.cpp + - viewer/svpaint.cpp + + include_directories: + public: + #private: + - classify + - cube + - cutil + - dict + - neural_networks/runtime + - opencl + - textord + - vs2010/port + - viewer + - wordrec + #public: + - api + - ccmain + - ccstruct + - ccutil + + check_function_exists: + - getline + + check_symbol_exists: + snprintf: stdio.h + + check_include_exists: + - dlfcn.h + - inttypes.h + - limits.h + - malloc.h + - memory.h + - stdbool.h + - stdint.h + - stdlib.h + - strings.h + - string.h + - sys/ipc.h + - sys/shm.h + - sys/stat.h + - sys/types.h + - sys/wait.h + - tiffio.h + - unistd.h + - cairo/cairo-version.h + - CL/cl.h + - OpenCL/cl.h + - pango-1.0/pango/pango-features.h + - unicode/uchar.h + + check_type_size: + - long long int + - off_t + - mbstate_t + - wchar_t + - _Bool + + + post_sources: | + file_write_once(${BDIR}/config_auto.h "") + if (NOT WIN32) + remove_src_dir(vs2010/port/*) + endif() + + options: + any: + definitions: + public: + - HAVE_CONFIG_H + - _SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS=1 + - USE_STD_NAMESPACE=1 + - WINDLLNAME="tesseract" + shared: + definitions: + private: + - TESS_EXPORTS + interface: + - TESS_IMPORTS + + dependencies: + pvt.cppan.demo.danbloomberg.leptonica: 1.74 + + tesseractmain: + files: api/tesseractmain.cpp + dependencies: + - tesseract + + tessopt: + type: lib + static_only: true + files: training/tessopt.* + include_directories: training + dependencies: tesseract + + common_training: + type: lib + static_only: true + files: + - training/commandlineflags.cpp + - training/commandlineflags.h + - training/commontraining.cpp + - training/commontraining.h + include_directories: training + dependencies: + - tessopt + + ambiguous_words: + files: training/ambiguous_words.cpp + dependencies: + - tesseract + + classifier_tester: + files: training/classifier_tester.cpp + dependencies: common_training + + combine_tessdata: + files: training/combine_tessdata.cpp + dependencies: tesseract + + cntraining: + files: training/cntraining.cpp + dependencies: common_training + + dawg2wordlist: + files: training/dawg2wordlist.cpp + dependencies: tesseract + + mftraining: + files: + - training/mftraining.cpp + - training/mergenf.* + dependencies: common_training + + shapeclustering: + files: training/shapeclustering.cpp + dependencies: common_training + + unicharset_extractor: + files: training/unicharset_extractor.cpp + dependencies: tessopt + + wordlist2dawg: + files: training/wordlist2dawg.cpp + dependencies: tesseract + + unicharset_training: + type: lib + static_only: true + files: + - training/fileio.* + - training/icuerrorcode.h + - training/lstmtester.* + - training/normstrngs.* + - training/unicharset_training_utils.* + include_directories: training + dependencies: + - common_training + - pvt.cppan.demo.unicode.icu.i18n + + set_unicharset_properties: + files: training/set_unicharset_properties.cpp + dependencies: unicharset_training + + text2image: + files: + - training/text2image.cpp + - training/boxchar.cpp + - training/boxchar.h + - training/degradeimage.cpp + - training/degradeimage.h + - training/ligature_table.cpp + - training/ligature_table.h + - training/normstrngs.cpp + - training/normstrngs.h + - training/pango_font_info.cpp + - training/pango_font_info.h + - training/stringrenderer.cpp + - training/stringrenderer.h + - training/tlog.cpp + - training/tlog.h + - training/util.h + - training/icuerrorcode.h + + dependencies: + - unicharset_training + - pvt.cppan.demo.gnome.pango.pangocairo: 1 From 77ff65078c1a0a4892167b0df5634aa48896e9b7 Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Mon, 13 Feb 2017 23:11:30 +0300 Subject: [PATCH 105/132] Backport cmake fixes. --- CMakeLists.txt | 25 +++++++++++++++++++------ appveyor.yml | 27 ++++++++++++++++++++------- training/CMakeLists.txt | 21 +++++++++++++++++---- 3 files changed, 56 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e4bc5cbb9e..95adcf7a02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,19 +46,25 @@ set(VERSION_PLAIN ${VERSION_MAJOR}.${VERSION_MINOR}) set(MINIMUM_LEPTONICA_VERSION 1.71) -if(NOT EXISTS ${PROJECT_SOURCE_DIR}/cppan) +if(NOT EXISTS ${PROJECT_SOURCE_DIR}/.cppan) if (NOT Leptonica_DIR AND NOT MSVC) find_package(PkgConfig REQUIRED) - pkg_check_modules(Leptonica REQUIRED lept) + pkg_check_modules(Leptonica REQUIRED lept>=${MINIMUM_LEPTONICA_VERSION}) else() find_package(Leptonica ${MINIMUM_LEPTONICA_VERSION} REQUIRED CONFIG) endif() else() - add_subdirectory(cppan) + if (STATIC) + set(CPPAN_BUILD_SHARED_LIBS 0) + else() + set(CPPAN_BUILD_SHARED_LIBS 1) + endif() + add_subdirectory(.cppan) endif() find_package(OpenCL QUIET) -find_package(PkgConfig) + +option(BUILD_TRAINING_TOOLS "Build training tools" ON) ############################################################################### # @@ -203,7 +209,11 @@ set(tesseract_src ${tesseract_src} add_library (tesseract ${LIBRARY_TYPE} ${tesseract_src} ${tesseract_hdr}) if (NOT STATIC) -target_compile_definitions (tesseract PUBLIC -DTESS_EXPORTS) +target_compile_definitions (tesseract + PRIVATE -DTESS_EXPORTS + INTERFACE -DTESS_IMPORTS +) +set_target_properties (tesseract PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS True) endif() target_link_libraries (tesseract ${LIB_Ws2_32} ${LIB_pthread}) set_target_properties (tesseract PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1}) @@ -217,7 +227,8 @@ if (NOT CPPAN_BUILD) target_link_libraries (tesseract ${Leptonica_LIBRARIES}) export(TARGETS tesseract FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake) else() - target_link_libraries (tesseract cppan) + target_link_libraries (tesseract pvt.cppan.demo.danbloomberg.leptonica) + add_dependencies (tesseract cppan) file(WRITE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake "include(${CMAKE_BINARY_DIR}/cppan.cmake)\n") export(TARGETS tesseract APPEND FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake) endif() @@ -237,6 +248,8 @@ set_target_properties (tesseractmain PROPERTIES OUTPUT_NAME tesseract) ######################################## +if (BUILD_TRAINING_TOOLS) add_subdirectory(training) +endif() ############################################################################### diff --git a/appveyor.yml b/appveyor.yml index 7dc013d30e..b6b7f86457 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -8,18 +8,31 @@ configuration: - Release before_build: - - if %platform%==Win32 set generator=Visual Studio 14 - - if %platform%==Win64 set generator=Visual Studio 14 Win64 + - if %platform%==Win32 set generator=Visual Studio 14 2015 + - if %platform%==Win64 set generator=Visual Studio 14 2015 Win64 - if %platform%==Win32 set vcplatform=Win32 - if %platform%==Win64 set vcplatform=x64 - + - curl -fsS -L -o cppan.zip https://cppan.org/client/cppan-master-Windows-client.zip - 7z x cppan.zip - set PATH=%PATH%;%cd% - + + - cppan # dummy run to create %USERPROFILE%\.cppan\cppan.yml + - ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nvar_check_jobs: 1`n"' + - ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nbuild_warning_level: 0`n"' + - ps: 'Add-Content $env:USERPROFILE\.cppan\cppan.yml "`n`nbuild_system_verbose: false`n"' + build_script: - - cppan - mkdir build - cd build - - cmake .. -G "%generator%" -DSTATIC=1 - - msbuild tesseract.sln /p:Platform=%vcplatform% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" + #- cmd: 'echo local_settings: > cppan.yml' + #- cmd: 'echo generator: %generator% >> cppan.yml' + #- cmd: 'echo use_shared_libs: true >> cppan.yml' + #- cppan --build .. + - cmake .. -G "%generator%" -DBUILD_TRAINING_TOOLS=Off -DAPPVEYOR=1 + - cmake --build . --config Release + +artifacts: + - path: build\bin\Release + #- path: build + name: tesseract-$(APPVEYOR_BUILD_VERSION) \ No newline at end of file diff --git a/training/CMakeLists.txt b/training/CMakeLists.txt index a733e73ea0..4cc2c7553a 100644 --- a/training/CMakeLists.txt +++ b/training/CMakeLists.txt @@ -2,10 +2,16 @@ # tesseract # -if (STATIC OR NOT (WIN32 OR CYGWIN)) +if (NOT CPPAN_BUILD AND NOT (WIN32 OR CYGWIN)) + return() +endif() + +if (CPPAN_BUILD) + set(ICU_FOUND 1) +endif() # experimental -if (MSVC) +if (MSVC AND NOT CPPAN_BUILD) include(CheckTypeSize) check_type_size("void *" SIZEOF_VOID_P) @@ -48,10 +54,12 @@ endif() set(ICU_ROOT ${icu_dir}/icu) -endif(MSVC) +endif() # experimental +if (NOT CPPAN_BUILD) find_package(ICU COMPONENTS uc i18n) +endif() ######################################## # LIBRARY tessopt @@ -165,7 +173,9 @@ project_group (wordlist2dawg "Training Tools") if (ICU_FOUND) +if (NOT CPPAN_BUILD) include_directories(${ICU_INCLUDE_DIRS}) +endif() add_executable (set_unicharset_properties set_unicharset_properties.cpp @@ -177,7 +187,11 @@ add_executable (set_unicharset_properties normstrngs.h icuerrorcode.h ) +if (NOT CPPAN_BUILD) target_link_libraries (set_unicharset_properties common_training ${ICU_LIBRARIES}) +else() +target_link_libraries (set_unicharset_properties common_training pvt.cppan.demo.unicode.icu.i18n) +endif() project_group (set_unicharset_properties "Training Tools") @@ -233,6 +247,5 @@ project_group (text2image "Training Tools") endif(PKG_CONFIG_FOUND) endif(ICU_FOUND) -endif(STATIC OR NOT (WIN32 OR CYGWIN)) ############################################################################### From 16563b4df3c0a59427cbb40d41bdf15cf9edf9bb Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Wed, 15 Feb 2017 13:29:24 +0300 Subject: [PATCH 106/132] Add .cppan to ignore list. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 022cc693c9..8a3d338b50 100644 --- a/.gitignore +++ b/.gitignore @@ -75,6 +75,7 @@ kernel*.bin # build dirs /build* /cppan +/.cppan /win* *.dll *.exe From db87e210e30d8ce108decaa7568de1a88058555f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Thu, 16 Feb 2017 18:51:45 +0100 Subject: [PATCH 107/132] 3.05.00 release --- ChangeLog | 21 +++++++++++++++++++++ configure.ac | 6 +++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 492d6984c9..2ac86b34c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +2017-02-16 - V3.05.00 + * Made some fine tuning to the hOCR output. + * Added TSV as another optional output format. + * Fixed ABI break introduced in 3.04.00 with the AnalyseLayout() method. + * text2image tool - Enable all OpenType ligatures available in a font. This feature requires Pango 1.38 or newer. + * Training tools - Replaced asserts with tprintf() and exit(1). + * Fixed Cygwin compatibility. + * Improved multipage tiff processing. + * Improved the embedded pdf font (pdf.ttf). + * Enable selection of OCR engine mode from command line. + * Changed tesseract command line parameter '-psm' to '--psm'. + * Added new C API for orientation and script detection, removed the old one. + * Increased minimum autoconf version to 2.59. + * Removed dead code. + * Fixed many compiler warning. + * Fixed memory and resource leaks. + * Fixed some issues with the 'Cube' OCR engine. + * Fixed some openCL issues. + * Added option to build Tesseract with CMake build system. + * Implemented CPPAN support for easy Windows building. + 2016-02-17 - V3.04.01 * Added OSD renderer for psm 0. Works for single page and multi-page images. * Improve tesstrain.sh script. diff --git a/configure.ac b/configure.ac index 0f1198402c..f02cea108e 100644 --- a/configure.ac +++ b/configure.ac @@ -6,7 +6,7 @@ # Initialization # ---------------------------------------- AC_PREREQ([2.59]) -AC_INIT([tesseract], [3.05.00dev], [https://github.com/tesseract-ocr/tesseract/issues]) +AC_INIT([tesseract], [3.05.00], [https://github.com/tesseract-ocr/tesseract/issues]) AC_PROG_CXX([g++ clang++]) AC_LANG([C++]) AC_LANG_COMPILER_REQUIRE @@ -18,8 +18,8 @@ AC_PREFIX_DEFAULT([/usr/local]) # Define date of package, etc. Could be useful in auto-generated # documentation. -PACKAGE_YEAR=2015 -PACKAGE_DATE="07/11" +PACKAGE_YEAR=2017 +PACKAGE_DATE="02/16" abs_top_srcdir=`AS_DIRNAME([$0])` gitrev="`git --git-dir=${abs_top_srcdir}/.git --work-tree=${abs_top_srcdir} describe --always --tags`" From 998d4735d021b8023f46651a08ab26477f264fd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Thu, 16 Feb 2017 18:59:32 +0100 Subject: [PATCH 108/132] 3.05.00 release --- ChangeLog | 21 +++++++++++++++++++++ README.md | 2 +- api/baseapi.h | 2 +- configure.ac | 6 +++--- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 492d6984c9..2ac86b34c5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,24 @@ +2017-02-16 - V3.05.00 + * Made some fine tuning to the hOCR output. + * Added TSV as another optional output format. + * Fixed ABI break introduced in 3.04.00 with the AnalyseLayout() method. + * text2image tool - Enable all OpenType ligatures available in a font. This feature requires Pango 1.38 or newer. + * Training tools - Replaced asserts with tprintf() and exit(1). + * Fixed Cygwin compatibility. + * Improved multipage tiff processing. + * Improved the embedded pdf font (pdf.ttf). + * Enable selection of OCR engine mode from command line. + * Changed tesseract command line parameter '-psm' to '--psm'. + * Added new C API for orientation and script detection, removed the old one. + * Increased minimum autoconf version to 2.59. + * Removed dead code. + * Fixed many compiler warning. + * Fixed memory and resource leaks. + * Fixed some issues with the 'Cube' OCR engine. + * Fixed some openCL issues. + * Added option to build Tesseract with CMake build system. + * Implemented CPPAN support for easy Windows building. + 2016-02-17 - V3.04.01 * Added OSD renderer for psm 0. Works for single page and multi-page images. * Improve tesstrain.sh script. diff --git a/README.md b/README.md index 30af585520..3b04ebc083 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ This project does not include a GUI application. If you need one, please see the You should note that in many cases, in order to get better OCR results, you'll need to [improve the quality](https://github.com/tesseract-ocr/tesseract/wiki/ImproveQuality) of the image you are giving Tesseract. -The latest stable version is 3.04.01, released in February 2016. +The latest stable version is 3.05.00, released in February 2017. #Brief history diff --git a/api/baseapi.h b/api/baseapi.h index e70558fbc0..ab51ef20d6 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -20,7 +20,7 @@ #ifndef TESSERACT_API_BASEAPI_H__ #define TESSERACT_API_BASEAPI_H__ -#define TESSERACT_VERSION_STR "3.05.00dev" +#define TESSERACT_VERSION_STR "3.05.00" #define TESSERACT_VERSION 0x030500 #define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \ (patch)) diff --git a/configure.ac b/configure.ac index 0f1198402c..f02cea108e 100644 --- a/configure.ac +++ b/configure.ac @@ -6,7 +6,7 @@ # Initialization # ---------------------------------------- AC_PREREQ([2.59]) -AC_INIT([tesseract], [3.05.00dev], [https://github.com/tesseract-ocr/tesseract/issues]) +AC_INIT([tesseract], [3.05.00], [https://github.com/tesseract-ocr/tesseract/issues]) AC_PROG_CXX([g++ clang++]) AC_LANG([C++]) AC_LANG_COMPILER_REQUIRE @@ -18,8 +18,8 @@ AC_PREFIX_DEFAULT([/usr/local]) # Define date of package, etc. Could be useful in auto-generated # documentation. -PACKAGE_YEAR=2015 -PACKAGE_DATE="07/11" +PACKAGE_YEAR=2017 +PACKAGE_DATE="02/16" abs_top_srcdir=`AS_DIRNAME([$0])` gitrev="`git --git-dir=${abs_top_srcdir}/.git --work-tree=${abs_top_srcdir} describe --always --tags`" From e85a7e2529416a12e806a8df2e9fdf92f7e39a5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Sun, 19 Feb 2017 13:49:22 +0100 Subject: [PATCH 109/132] replace nullptr with NULL to enable non c++11 build (fixes #727) --- ccmain/thresholder.cpp | 2 +- training/stringrenderer.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ccmain/thresholder.cpp b/ccmain/thresholder.cpp index 77069bc9d9..4208c65ba2 100644 --- a/ccmain/thresholder.cpp +++ b/ccmain/thresholder.cpp @@ -184,7 +184,7 @@ void ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) { // We have a binary image, but it still has to be copied, as this API // allows the caller to modify the output. Pix* original = GetPixRect(); - *pix = pixCopy(nullptr, original); + *pix = pixCopy(NULL, original); pixDestroy(&original); } else { OtsuThresholdRectToPix(pix_, pix); diff --git a/training/stringrenderer.cpp b/training/stringrenderer.cpp index e7f9699f18..07a0e2003d 100644 --- a/training/stringrenderer.cpp +++ b/training/stringrenderer.cpp @@ -244,7 +244,7 @@ void StringRenderer::SetWordUnderlineAttributes(const string& page_text) { int offset = 0; TRand rand; bool started_underline = false; - PangoAttribute* und_attr = nullptr; + PangoAttribute* und_attr = NULL; while (offset < page_text.length()) { offset += SpanUTF8Whitespace(text + offset); @@ -263,7 +263,7 @@ void StringRenderer::SetWordUnderlineAttributes(const string& page_text) { // previous word. pango_attr_list_insert(attr_list, und_attr); started_underline = false; - und_attr = nullptr; + und_attr = NULL; } } if (!started_underline && RandBool(underline_start_prob_, &rand)) { From 40dc28026b61f4aa6dcb37782ed1df4f1d5ee77a Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Thu, 23 Feb 2017 20:20:05 +0300 Subject: [PATCH 110/132] Rename cppan/cmake targets. --- CMakeLists.txt | 31 +++++++++++++++---------------- cppan.yml | 17 ++++++++--------- training/CMakeLists.txt | 14 +++++++------- 3 files changed, 30 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 95adcf7a02..dc5e088b8f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -207,30 +207,30 @@ set(tesseract_src ${tesseract_src} api/pdfrenderer.cpp ) -add_library (tesseract ${LIBRARY_TYPE} ${tesseract_src} ${tesseract_hdr}) +add_library (libtesseract ${LIBRARY_TYPE} ${tesseract_src} ${tesseract_hdr}) if (NOT STATIC) -target_compile_definitions (tesseract +target_compile_definitions (libtesseract PRIVATE -DTESS_EXPORTS INTERFACE -DTESS_IMPORTS ) -set_target_properties (tesseract PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS True) +set_target_properties (libtesseract PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS True) endif() -target_link_libraries (tesseract ${LIB_Ws2_32} ${LIB_pthread}) -set_target_properties (tesseract PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1}) -set_target_properties (tesseract PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1}) +target_link_libraries (libtesseract ${LIB_Ws2_32} ${LIB_pthread}) +set_target_properties (libtesseract PROPERTIES VERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1}) +set_target_properties (libtesseract PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR_0}.${VERSION_MINOR_1}) if (WIN32) -set_target_properties (tesseract PROPERTIES OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}) -set_target_properties (tesseract PROPERTIES DEBUG_OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}d) +set_target_properties (libtesseract PROPERTIES OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}) +set_target_properties (libtesseract PROPERTIES DEBUG_OUTPUT_NAME tesseract${VERSION_MAJOR}${VERSION_MINOR}d) endif() if (NOT CPPAN_BUILD) - target_link_libraries (tesseract ${Leptonica_LIBRARIES}) - export(TARGETS tesseract FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake) + target_link_libraries (libtesseract ${Leptonica_LIBRARIES}) + export(TARGETS libtesseract FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake) else() - target_link_libraries (tesseract pvt.cppan.demo.danbloomberg.leptonica) - add_dependencies (tesseract cppan) + target_link_libraries (libtesseract pvt.cppan.demo.danbloomberg.leptonica) + add_dependencies (libtesseract cppan) file(WRITE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake "include(${CMAKE_BINARY_DIR}/cppan.cmake)\n") - export(TARGETS tesseract APPEND FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake) + export(TARGETS libtesseract APPEND FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake) endif() ######################################## @@ -242,9 +242,8 @@ set(tesseractmain_src vs2010/tesseract/resource.h vs2010/tesseract/tesseract.rc ) -add_executable (tesseractmain ${tesseractmain_src}) -target_link_libraries (tesseractmain tesseract) -set_target_properties (tesseractmain PROPERTIES OUTPUT_NAME tesseract) +add_executable (tesseract ${tesseractmain_src}) +target_link_libraries (tesseract libtesseract) ######################################## diff --git a/cppan.yml b/cppan.yml index 8851c26887..ef38d520c1 100644 --- a/cppan.yml +++ b/cppan.yml @@ -17,7 +17,7 @@ common_settings: c++: 11 projects: - tesseract: + libtesseract: type: lib export_all_symbols: true files: @@ -136,17 +136,17 @@ projects: dependencies: pvt.cppan.demo.danbloomberg.leptonica: 1.74 - tesseractmain: + tesseract: files: api/tesseractmain.cpp dependencies: - - tesseract + - libtesseract tessopt: type: lib static_only: true files: training/tessopt.* include_directories: training - dependencies: tesseract + dependencies: libtesseract common_training: type: lib @@ -162,8 +162,7 @@ projects: ambiguous_words: files: training/ambiguous_words.cpp - dependencies: - - tesseract + dependencies: libtesseract classifier_tester: files: training/classifier_tester.cpp @@ -171,7 +170,7 @@ projects: combine_tessdata: files: training/combine_tessdata.cpp - dependencies: tesseract + dependencies: libtesseract cntraining: files: training/cntraining.cpp @@ -179,7 +178,7 @@ projects: dawg2wordlist: files: training/dawg2wordlist.cpp - dependencies: tesseract + dependencies: libtesseract mftraining: files: @@ -197,7 +196,7 @@ projects: wordlist2dawg: files: training/wordlist2dawg.cpp - dependencies: tesseract + dependencies: libtesseract unicharset_training: type: lib diff --git a/training/CMakeLists.txt b/training/CMakeLists.txt index 4cc2c7553a..c13ed77479 100644 --- a/training/CMakeLists.txt +++ b/training/CMakeLists.txt @@ -82,7 +82,7 @@ set(common_training_hdr commontraining.h ) add_library (common_training ${common_training_src} ${common_training_hdr}) -target_link_libraries (common_training tesseract tessopt) +target_link_libraries (common_training libtesseract tessopt) project_group (common_training "Training Tools") @@ -91,7 +91,7 @@ project_group (common_training "Training Tools") ######################################## add_executable (ambiguous_words ambiguous_words.cpp) -target_link_libraries (ambiguous_words tesseract) +target_link_libraries (ambiguous_words libtesseract) project_group (ambiguous_words "Training Tools") @@ -109,7 +109,7 @@ project_group (classifier_tester "Training Tools") ######################################## add_executable (combine_tessdata combine_tessdata.cpp) -target_link_libraries (combine_tessdata tesseract) +target_link_libraries (combine_tessdata libtesseract) project_group (combine_tessdata "Training Tools") @@ -127,7 +127,7 @@ project_group (cntraining "Training Tools") ######################################## add_executable (dawg2wordlist dawg2wordlist.cpp) -target_link_libraries (dawg2wordlist tesseract) +target_link_libraries (dawg2wordlist libtesseract) project_group (dawg2wordlist "Training Tools") @@ -154,7 +154,7 @@ project_group (shapeclustering "Training Tools") ######################################## add_executable (unicharset_extractor unicharset_extractor.cpp) -target_link_libraries (unicharset_extractor tesseract tessopt) +target_link_libraries (unicharset_extractor libtesseract tessopt) project_group (unicharset_extractor "Training Tools") @@ -163,7 +163,7 @@ project_group (unicharset_extractor "Training Tools") ######################################## add_executable (wordlist2dawg wordlist2dawg.cpp) -target_link_libraries (wordlist2dawg tesseract) +target_link_libraries (wordlist2dawg libtesseract) project_group (wordlist2dawg "Training Tools") @@ -235,7 +235,7 @@ endif() add_executable (text2image ${text2image_src}) target_include_directories (text2image BEFORE PRIVATE ${Cairo_INCLUDE_DIRS} ${Pango_INCLUDE_DIRS}) target_compile_definitions (text2image PRIVATE -DPANGO_ENABLE_ENGINE) -target_link_libraries (text2image tesseract common_training +target_link_libraries (text2image libtesseract common_training ${ICU_LIBRARIES} ${Pango_LIBRARIES} ${Cairo_LIBRARIES} From 183ee5668bee670500a6bfaf5024c2a5171a3101 Mon Sep 17 00:00:00 2001 From: Mikhail Solomennik Date: Wed, 1 Mar 2017 14:41:17 +0300 Subject: [PATCH 111/132] Correct reading config files with \r\n --- ccutil/params.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccutil/params.cpp b/ccutil/params.cpp index c8dd3514e5..30604f1129 100644 --- a/ccutil/params.cpp +++ b/ccutil/params.cpp @@ -72,7 +72,7 @@ bool ParamUtils::ReadParamsFromFp(FILE *fp, inT64 end_offset, while ((end_offset < 0 || ftell(fp) < end_offset) && fgets(line, MAX_PATH, fp)) { - if (line[0] != '\n' && line[0] != '#') { + if (line[0] != '\r' && line[0] != '\n' && line[0] != '#') { chomp_string(line); // remove newline for (valptr = line; *valptr && *valptr != ' ' && *valptr != '\t'; valptr++); From 6fbd9d264312433deed2515848e4741e3184942c Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 22 Dec 2016 22:29:54 +0100 Subject: [PATCH 112/132] Use camel case for GitHub in README.md Signed-off-by: Stefan Weil --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3b04ebc083..04c5112fce 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,8 @@ For the latest online version of the README.md see: This package contains an OCR engine - `libtesseract` and a command line program - `tesseract`. The lead developer is Ray Smith. The maintainer is Zdenko Podobny. -For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/master/AUTHORS) and github's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors). +For a list of contributors see [AUTHORS](https://github.com/tesseract-ocr/tesseract/blob/master/AUTHORS) +and GitHub's log of [contributors](https://github.com/tesseract-ocr/tesseract/graphs/contributors). Tesseract has unicode (UTF-8) support, and can recognize more than 100 languages "out of the box". It can be trained to recognize other languages. See [Tesseract Training](https://github.com/tesseract-ocr/tesseract/wiki/TrainingTesseract) for more information. From 697c3dc4daf8d7921dba46a5393edc8e4b6a8111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 10 Mar 2017 11:03:12 +0100 Subject: [PATCH 113/132] Fix indentation after conditional [-Wmisleading-indentation] The indentation is wrong since commit fd0683f9e03934bbdf7fbebb4d21d64c37b68bc0 and results in a gcc warning: api/baseapi.cpp: In member function 'bool tesseract::TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8*, size_t, const char*, const char*, int, tesseract::TessResultRenderer*, int)': api/baseapi.cpp:986:5: warning: this 'if' clause does not guard... [-Wmisleading-indentation] if (tessedit_page_number >= 0) ^~ api/baseapi.cpp:988:7: note: ...this statement, but the latter is misleadingly indented as if it is guarded by the 'if' pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) ^~~ Signed-off-by: Stefan Weil --- api/baseapi.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/api/baseapi.cpp b/api/baseapi.cpp index 75b5cb393f..d552b8d1c3 100644 --- a/api/baseapi.cpp +++ b/api/baseapi.cpp @@ -1039,9 +1039,8 @@ bool TessBaseAPI::ProcessPagesMultipageTiff(const l_uint8 *data, od.pixReadTiffCl(filename, page); } else { #endif // USE_OPENCL - pix = (data) ? - pixReadMemFromMultipageTiff(data, size, &offset) : - pixReadFromMultipageTiff(filename, &offset); + pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset) + : pixReadFromMultipageTiff(filename, &offset); #ifdef USE_OPENCL } #endif // USE_OPENCL From efb89f8133c0526693efc2cf42ae79ef4397dcae Mon Sep 17 00:00:00 2001 From: Bryce Glover Date: Tue, 7 Mar 2017 16:23:47 -0500 Subject: [PATCH 114/132] [`autogen.sh`:] Abstract the absolute path of `libtoolize` or `glibtoolize` away into `$LIBTOOLIZE`. Increase portability by insulating `autogen.sh` from platform variance. --- autogen.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/autogen.sh b/autogen.sh index a551bb4b4d..a0e7d3fbdf 100755 --- a/autogen.sh +++ b/autogen.sh @@ -46,6 +46,18 @@ if [ "$1" = "clean" ]; then find . -iname "Makefile.in" -type f -exec rm '{}' + fi +# Prevent any errors that might result from failing to properly invoke `libtoolize` or `glibtoolize,` whichever +# is present on your system, from occurring by testing for its existence and capturing the absolute path to its +# location for caching purposes prior to using it later on in 'Step 2:' +if command -v libtoolize >/dev/null 2>&1; then + LIBTOOLIZE="$(command -v libtoolize)" +elif command -v glibtoolize >/dev/null 2>&1; then + LIBTOOLIZE="$(command -v glibtoolize)" +else + echo "Unable to find a valid copy of libtoolize in your PATH!" + bail_out +fi + # create m4 directory if it not exists if [ ! -d m4 ]; then mkdir m4 @@ -71,8 +83,8 @@ aclocal -I config || bail_out # --- Step 2: echo "Running libtoolize" -libtoolize -f -c || glibtoolize -f -c || bail_out -libtoolize --automake || glibtoolize --automake || bail_out +$LIBTOOLIZE -f -c || bail_out +$LIBTOOLIZE --automake || bail_out # --- Step 3: Generate config.h.in from: # . configure.ac (look for AM_CONFIG_HEADER tag or AC_CONFIG_HEADER tag) From ed49959d2a599e0ef7faf9087a9c16bf292c751d Mon Sep 17 00:00:00 2001 From: Bryce Glover Date: Thu, 9 Mar 2017 15:28:11 -0500 Subject: [PATCH 115/132] [`autogen.sh`:] Reduce in-script comment block width to 80 characters. This is with respect to the comment preceding the `libtoolize`/`glibtoolize` existence check I introduced. --- autogen.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/autogen.sh b/autogen.sh index a0e7d3fbdf..db1008720c 100755 --- a/autogen.sh +++ b/autogen.sh @@ -46,9 +46,10 @@ if [ "$1" = "clean" ]; then find . -iname "Makefile.in" -type f -exec rm '{}' + fi -# Prevent any errors that might result from failing to properly invoke `libtoolize` or `glibtoolize,` whichever -# is present on your system, from occurring by testing for its existence and capturing the absolute path to its -# location for caching purposes prior to using it later on in 'Step 2:' +# Prevent any errors that might result from failing to properly invoke +# `libtoolize` or `glibtoolize,` whichever is present on your system, +# from occurring by testing for its existence and capturing the absolute path to +# its location for caching purposes prior to using it later on in 'Step 2:' if command -v libtoolize >/dev/null 2>&1; then LIBTOOLIZE="$(command -v libtoolize)" elif command -v glibtoolize >/dev/null 2>&1; then From 3671de002dcea069f2229341439c04400b11d400 Mon Sep 17 00:00:00 2001 From: Bryce Glover Date: Thu, 9 Mar 2017 15:32:17 -0500 Subject: [PATCH 116/132] [`autogen.sh`:] Clarify `libtoolize`/`glibtoolize` existence check error message. Explicitly mention the latter variant of the tool inside said error message. --- autogen.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autogen.sh b/autogen.sh index db1008720c..897a383515 100755 --- a/autogen.sh +++ b/autogen.sh @@ -55,7 +55,7 @@ if command -v libtoolize >/dev/null 2>&1; then elif command -v glibtoolize >/dev/null 2>&1; then LIBTOOLIZE="$(command -v glibtoolize)" else - echo "Unable to find a valid copy of libtoolize in your PATH!" + echo "Unable to find a valid copy of libtoolize or glibtoolize in your PATH!" bail_out fi From d9a9876a7024955ee91ded14dce24c86e2e75e14 Mon Sep 17 00:00:00 2001 From: Bryce Glover Date: Thu, 9 Mar 2017 15:35:17 -0500 Subject: [PATCH 117/132] [`autogen.sh`:] Improve `libtoolize` invocation message. Use the `$LIBTOOLIZE` variable inside the message to abstract over the two possible variants of the tool which can be invoked. --- autogen.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autogen.sh b/autogen.sh index 897a383515..5319afbc86 100755 --- a/autogen.sh +++ b/autogen.sh @@ -83,7 +83,7 @@ aclocal -I config || bail_out # --- Step 2: -echo "Running libtoolize" +echo "Running $LIBTOOLIZE" $LIBTOOLIZE -f -c || bail_out $LIBTOOLIZE --automake || bail_out From 35a93348c0c18a5f82982662c2c40dbbb6feaf25 Mon Sep 17 00:00:00 2001 From: Leo Arias Date: Fri, 3 Feb 2017 06:37:39 +0000 Subject: [PATCH 118/132] Add the packaging metadata to build the tesseract snap --- snap/snapcraft.yaml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 snap/snapcraft.yaml diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml new file mode 100644 index 0000000000..4dd68d4a44 --- /dev/null +++ b/snap/snapcraft.yaml @@ -0,0 +1,34 @@ +name: tesseract +version: master +summary: open source optical character recognition engine +description: | + Tesseract has unicode (UTF-8) support, and can recognize more than 100 + languages "out of the box". It can be trained to recognize other languages. + Tesseract supports various output formats: plain-text, hocr(html), pdf. + +grade: devel # must be 'stable' to release into candidate/stable channels +confinement: strict + +apps: + tesseract: + command: env TESSDATA_PREFIX=$SNAP_USER_COMMON tesseract + plugs: [home] + +parts: + tesseract: + source: . + plugin: autotools + build-packages: + - autoconf-archive + - pkg-config + - libpng12-dev + - libjpeg8-dev + - libtiff5-dev + - zlib1g-dev + - libicu-dev + - libpango1.0-dev + - libcairo2-dev + after: [leptonica] + leptonica: + source: http://www.leptonica.org/source/leptonica-1.74.1.tar.gz + plugin: autotools From 6fcfcc85ac58fc81adf008ae5be088f2f7a4ad6a Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 10 Mar 2017 11:51:16 +0100 Subject: [PATCH 119/132] Use portable data types #709 --- ccstruct/fontinfo.cpp | 2 +- ccstruct/fontinfo.h | 4 ++-- classify/adaptive.cpp | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ccstruct/fontinfo.cpp b/ccstruct/fontinfo.cpp index d3e6f3756e..536ac280de 100644 --- a/ccstruct/fontinfo.cpp +++ b/ccstruct/fontinfo.cpp @@ -241,7 +241,7 @@ bool read_set(FILE* f, FontSet* fs, bool swap) { if (fread(&fs->size, sizeof(fs->size), 1, f) != 1) return false; if (swap) Reverse32(&fs->size); - fs->configs = new int[fs->size]; + fs->configs = new int32_t[fs->size]; for (int i = 0; i < fs->size; ++i) { if (fread(&fs->configs[i], sizeof(fs->configs[i]), 1, f) != 1) return false; if (swap) diff --git a/ccstruct/fontinfo.h b/ccstruct/fontinfo.h index 5f2d420852..1835d72ec6 100644 --- a/ccstruct/fontinfo.h +++ b/ccstruct/fontinfo.h @@ -135,8 +135,8 @@ struct FontInfo { // the FontInfo in the FontSet structure, it's better to share FontInfos among // FontSets (Classify::fontinfo_table_). struct FontSet { - int size; - int* configs; // FontInfo ids + int32_t size; + int32_t* configs; // FontInfo ids }; // Class that adds a bit of functionality on top of GenericVector to diff --git a/classify/adaptive.cpp b/classify/adaptive.cpp index 019befb4f6..a713fedc27 100644 --- a/classify/adaptive.cpp +++ b/classify/adaptive.cpp @@ -311,8 +311,8 @@ void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates) { * @note History: Tue Mar 19 14:11:01 1991, DSJ, Created. */ ADAPT_CLASS ReadAdaptedClass(FILE *File) { - int NumTempProtos; - int NumConfigs; + int32_t NumTempProtos; + int32_t NumConfigs; int i; ADAPT_CLASS Class; TEMP_PROTO TempProto; @@ -330,7 +330,7 @@ ADAPT_CLASS ReadAdaptedClass(FILE *File) { WordsInVectorOfSize (MAX_NUM_CONFIGS), File); /* then read in the list of temporary protos */ - fread ((char *) &NumTempProtos, sizeof (int), 1, File); + fread (&NumTempProtos, sizeof(NumTempProtos), 1, File); Class->TempProtos = NIL_LIST; for (i = 0; i < NumTempProtos; i++) { TempProto = @@ -341,7 +341,7 @@ ADAPT_CLASS ReadAdaptedClass(FILE *File) { } /* then read in the adapted configs */ - fread ((char *) &NumConfigs, sizeof (int), 1, File); + fread (&NumConfigs, sizeof(NumConfigs), 1, File); for (i = 0; i < NumConfigs; i++) if (test_bit (Class->PermConfigs, i)) Class->Config[i].Perm = ReadPermConfig (File); From ec8d52b8969eef62002a18e4f1af394854149f0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 10 Mar 2017 13:28:43 +0100 Subject: [PATCH 120/132] fix --disable-graphics build --- ccstruct/imagedata.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ccstruct/imagedata.cpp b/ccstruct/imagedata.cpp index 0c3f7cab78..86a2d74deb 100644 --- a/ccstruct/imagedata.cpp +++ b/ccstruct/imagedata.cpp @@ -438,7 +438,9 @@ void DocumentData::LoadPageInBackground(int index) { if (pages_offset_ == index) return; pages_offset_ = index; pages_.clear(); + #ifndef GRAPHICS_DISABLED SVSync::StartThread(ReCachePagesFunc, this); + #endif // GRAPHICS_DISABLED } // Returns a pointer to the page with the given index, modulo the total From 2c203e60591dcd665e397e6fee4cf009ad7e3cc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 10 Mar 2017 23:17:30 +0100 Subject: [PATCH 121/132] fix --enable-visibility build (including training tools) --- ccmain/cubeclassifier.h | 5 +++-- ccstruct/ratngs.h | 2 +- ccutil/ccutil.h | 2 +- ccutil/globaloc.h | 2 +- ccutil/params.h | 4 ++-- ccutil/unicharset.h | 8 ++++---- classify/cluster.h | 10 +++++----- classify/featdefs.h | 6 +++--- classify/intfeaturemap.h | 2 +- classify/intfeaturespace.h | 2 +- classify/intfx.h | 2 +- classify/kdtree.h | 2 +- classify/mastertrainer.h | 4 ++-- classify/ocrfeatures.h | 2 +- classify/protos.h | 4 ++-- classify/shapetable.h | 2 +- classify/tessclassifier.h | 3 ++- dict/dict.h | 2 +- 18 files changed, 33 insertions(+), 31 deletions(-) diff --git a/ccmain/cubeclassifier.h b/ccmain/cubeclassifier.h index 98bdb5cf00..3ae00f7974 100644 --- a/ccmain/cubeclassifier.h +++ b/ccmain/cubeclassifier.h @@ -23,6 +23,7 @@ #define THIRD_PARTY_TESSERACT_CCMAIN_CUBECLASSIFIER_H_ #include "shapeclassifier.h" +#include "platform.h" namespace tesseract { @@ -35,7 +36,7 @@ class TrainingSample; struct UnicharRating; // Cube implementation of a ShapeClassifier. -class CubeClassifier : public ShapeClassifier { +class TESS_API CubeClassifier : public ShapeClassifier { public: explicit CubeClassifier(Tesseract* tesseract); virtual ~CubeClassifier(); @@ -55,7 +56,7 @@ class CubeClassifier : public ShapeClassifier { }; // Combination of Tesseract class pruner with scoring by cube. -class CubeTessClassifier : public ShapeClassifier { +class TESS_API CubeTessClassifier : public ShapeClassifier { public: explicit CubeTessClassifier(Tesseract* tesseract); virtual ~CubeTessClassifier(); diff --git a/ccstruct/ratngs.h b/ccstruct/ratngs.h index 446dfc6c5d..2ee9c94a30 100644 --- a/ccstruct/ratngs.h +++ b/ccstruct/ratngs.h @@ -268,7 +268,7 @@ const char *ScriptPosToString(tesseract::ScriptPos script_pos); } // namespace tesseract. -class WERD_CHOICE : public ELIST_LINK { +class TESS_API WERD_CHOICE : public ELIST_LINK { public: static const float kBadRating; static const char *permuter_name(uinT8 permuter); diff --git a/ccutil/ccutil.h b/ccutil/ccutil.h index e4ec4f9ae8..faba0aef06 100644 --- a/ccutil/ccutil.h +++ b/ccutil/ccutil.h @@ -49,7 +49,7 @@ class CCUtilMutex { }; -class CCUtil { +class TESS_API CCUtil { public: CCUtil(); virtual ~CCUtil(); diff --git a/ccutil/globaloc.h b/ccutil/globaloc.h index 41438194c8..60d6b73ae2 100644 --- a/ccutil/globaloc.h +++ b/ccutil/globaloc.h @@ -29,7 +29,7 @@ void SavePixForCrash(int resolution, Pix* pix); void signal_exit(int signal_code); -void err_exit(); +void TESS_API err_exit(); void set_global_loc_code(int loc_code); diff --git a/ccutil/params.h b/ccutil/params.h index d49ce3ff8a..c168dbe4c6 100644 --- a/ccutil/params.h +++ b/ccutil/params.h @@ -55,7 +55,7 @@ class ParamUtils { // ORed or ANDed with any current values. // Blank lines and lines beginning # are ignored. // Values may have any whitespace after the name and are the rest of line. - static bool ReadParamsFile( + static bool TESS_API ReadParamsFile( const char *file, // filename to read SetParamConstraint constraint, ParamsVectors *member_params); @@ -252,7 +252,7 @@ class DoubleParam : public Param { // // TODO(daria): remove GlobalParams() when all global Tesseract // parameters are converted to members. -tesseract::ParamsVectors *GlobalParams(); +tesseract::ParamsVectors TESS_API *GlobalParams(); /************************************************************************* * Note on defining parameters. diff --git a/ccutil/unicharset.h b/ccutil/unicharset.h index 535a274ac7..eb9f463068 100644 --- a/ccutil/unicharset.h +++ b/ccutil/unicharset.h @@ -177,7 +177,7 @@ class UNICHARSET { // Return the UNICHAR_ID of a given unichar representation within the // UNICHARSET. - UNICHAR_ID unichar_to_id(const char* const unichar_repr) const; + UNICHAR_ID TESS_API unichar_to_id(const char* const unichar_repr) const; // Return the UNICHAR_ID of a given unichar representation within the // UNICHARSET. Only the first length characters from unichar_repr are used. @@ -234,7 +234,7 @@ class UNICHARSET { } // Add a unichar representation to the set. - void unichar_insert(const char* const unichar_repr); + void TESS_API unichar_insert(const char* const unichar_repr); // Return true if the given unichar id exists within the set. // Relies on the fact that unichar ids are contiguous in the unicharset. @@ -244,7 +244,7 @@ class UNICHARSET { } // Return true if the given unichar representation exists within the set. - bool contains_unichar(const char* const unichar_repr) const; + bool TESS_API contains_unichar(const char* const unichar_repr) const; bool contains_unichar(const char* const unichar_repr, int length) const; // Return true if the given unichar representation corresponds to the given @@ -327,7 +327,7 @@ class UNICHARSET { // Saves the content of the UNICHARSET to the given STRING. // Returns true if the operation is successful. - bool save_to_string(STRING *str) const; + bool TESS_API save_to_string(STRING *str) const; // Load a unicharset from a unicharset file that has been loaded into // the given memory buffer. diff --git a/classify/cluster.h b/classify/cluster.h index 53ddf87dad..a068a5d58c 100644 --- a/classify/cluster.h +++ b/classify/cluster.h @@ -107,15 +107,15 @@ typedef struct { /*-------------------------------------------------------------------------- Public Function Prototypes --------------------------------------------------------------------------*/ -CLUSTERER *MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]); +CLUSTERER TESS_API *MakeClusterer (inT16 SampleSize, const PARAM_DESC ParamDesc[]); -SAMPLE *MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, inT32 CharID); +SAMPLE TESS_API *MakeSample(CLUSTERER * Clusterer, const FLOAT32* Feature, inT32 CharID); LIST ClusterSamples(CLUSTERER *Clusterer, CLUSTERCONFIG *Config); -void FreeClusterer(CLUSTERER *Clusterer); +void TESS_API FreeClusterer(CLUSTERER *Clusterer); -void FreeProtoList(LIST *ProtoList); +void TESS_API FreeProtoList(LIST *ProtoList); void FreePrototype(void *arg); // PROTOTYPE *Prototype); @@ -125,7 +125,7 @@ FLOAT32 Mean(PROTOTYPE *Proto, uinT16 Dimension); FLOAT32 StandardDeviation(PROTOTYPE *Proto, uinT16 Dimension); -inT32 MergeClusters(inT16 N, PARAM_DESC ParamDesc[], inT32 n1, inT32 n2, +inT32 TESS_API MergeClusters(inT16 N, PARAM_DESC ParamDesc[], inT32 n1, inT32 n2, FLOAT32 m[], FLOAT32 m1[], FLOAT32 m2[]); //--------------Global Data Definitions and Declarations--------------------------- diff --git a/classify/featdefs.h b/classify/featdefs.h index 7c168f3daa..eb0c71ce45 100644 --- a/classify/featdefs.h +++ b/classify/featdefs.h @@ -55,7 +55,7 @@ typedef FEATURE_DEFS_STRUCT *FEATURE_DEFS; /*---------------------------------------------------------------------- Generic functions for manipulating character descriptions ----------------------------------------------------------------------*/ -void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs); +void TESS_API InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs); void FreeCharDescription(CHAR_DESC CharDesc); @@ -67,10 +67,10 @@ bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, void WriteCharDescription(const FEATURE_DEFS_STRUCT& FeatureDefs, CHAR_DESC CharDesc, STRING* str); -CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, +CHAR_DESC TESS_API ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File); -int ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, +int TESS_API ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName); /**---------------------------------------------------------------------------- diff --git a/classify/intfeaturemap.h b/classify/intfeaturemap.h index 55c5b5cf5e..93bc896c0e 100644 --- a/classify/intfeaturemap.h +++ b/classify/intfeaturemap.h @@ -75,7 +75,7 @@ class IntFeatureMap { // Copies the given feature_space and uses it as the index feature map // from INT_FEATURE_STRUCT. - void Init(const IntFeatureSpace& feature_space); + void TESS_API Init(const IntFeatureSpace& feature_space); // Helper to return an offset index feature. In this context an offset // feature with a dir of +/-1 is a feature of a similar direction, diff --git a/classify/intfeaturespace.h b/classify/intfeaturespace.h index e1e8e6ec9b..b72c42bdd1 100644 --- a/classify/intfeaturespace.h +++ b/classify/intfeaturespace.h @@ -35,7 +35,7 @@ class IndexMap; // Down-sampling quantization of the INT_FEATURE_STRUCT feature space and // conversion to a single scalar index value, used as a binary feature space. -class IntFeatureSpace { +class TESS_API IntFeatureSpace { public: IntFeatureSpace(); // Default copy constructors and assignment OK! diff --git a/classify/intfx.h b/classify/intfx.h index 26c435374c..1cd51a3022 100644 --- a/classify/intfx.h +++ b/classify/intfx.h @@ -48,7 +48,7 @@ const double kStandardFeatureLength = 64.0 / 5; /**---------------------------------------------------------------------------- Public Function Prototypes ----------------------------------------------------------------------------**/ -void InitIntegerFX(); +void TESS_API InitIntegerFX(); // Returns a vector representing the direction of a feature with the given // theta direction in an INT_FEATURE_STRUCT. diff --git a/classify/kdtree.h b/classify/kdtree.h index 1294ea3c6a..259d7078d0 100644 --- a/classify/kdtree.h +++ b/classify/kdtree.h @@ -83,7 +83,7 @@ void FreeKDNode(KDNODE *Node); FLOAT32 DistanceSquared(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]); -FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]); +FLOAT32 TESS_API ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[]); int QueryInSearch(KDTREE *tree); diff --git a/classify/mastertrainer.h b/classify/mastertrainer.h index 8cc7158acf..0bdc760648 100644 --- a/classify/mastertrainer.h +++ b/classify/mastertrainer.h @@ -66,7 +66,7 @@ struct ShapeDist { // Initially supports shape clustering and mftrainining. // Other important features of the MasterTrainer are conditioning the data // by outlier elimination, replication with perturbation, and serialization. -class MasterTrainer { +class TESS_API MasterTrainer { public: MasterTrainer(NormalizationMode norm_mode, bool shape_analysis, bool replicate_samples, int debug_level); @@ -298,7 +298,7 @@ class MasterTrainer { int debug_level_; // Feature map used to construct reduced feature spaces for compact // classifiers. - IntFeatureMap feature_map_; + IntFeatureMap TESS_API feature_map_; // Vector of Pix pointers used for classifiers that need the image. // Indexed by page_num_ in the samples. // These images are owned by the trainer and need to be pixDestroyed. diff --git a/classify/ocrfeatures.h b/classify/ocrfeatures.h index 31a4794ca6..ae00e5a834 100644 --- a/classify/ocrfeatures.h +++ b/classify/ocrfeatures.h @@ -108,7 +108,7 @@ BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature); void FreeFeature(FEATURE Feature); -void FreeFeatureSet(FEATURE_SET FeatureSet); +void TESS_API FreeFeatureSet(FEATURE_SET FeatureSet); FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc); diff --git a/classify/protos.h b/classify/protos.h index bb2f56b59f..7d36447915 100644 --- a/classify/protos.h +++ b/classify/protos.h @@ -166,13 +166,13 @@ void CopyProto(PROTO Src, PROTO Dest); void FillABC(PROTO Proto); -void FreeClass(CLASS_TYPE Class); +void TESS_API FreeClass(CLASS_TYPE Class); void FreeClassFields(CLASS_TYPE Class); void InitPrototypes(); -CLASS_TYPE NewClass(int NumProtos, int NumConfigs); +CLASS_TYPE TESS_API NewClass(int NumProtos, int NumConfigs); void PrintProtos(CLASS_TYPE Class); diff --git a/classify/shapetable.h b/classify/shapetable.h index d8faae8817..59c3fbc6df 100644 --- a/classify/shapetable.h +++ b/classify/shapetable.h @@ -261,7 +261,7 @@ class Shape { // that the shape represents. // Each UnicharAndFonts also lists the fonts of the unichar_id that were // mapped to the shape during training. -class ShapeTable { +class TESS_API ShapeTable { public: ShapeTable(); // The UNICHARSET reference supplied here, or in set_unicharset below must diff --git a/classify/tessclassifier.h b/classify/tessclassifier.h index 57a04861e2..fcf1870492 100644 --- a/classify/tessclassifier.h +++ b/classify/tessclassifier.h @@ -23,6 +23,7 @@ #define THIRD_PARTY_TESSERACT_CLASSIFY_TESSCLASSIFIER_H_ #include "shapeclassifier.h" +#include "platform.h" namespace tesseract { @@ -33,7 +34,7 @@ class TrainingSample; // Due to limitations in the content of TrainingSample, this currently // only works for the static classifier and only works if the ShapeTable // in classify is not NULL. -class TessClassifier : public ShapeClassifier { +class TESS_API TessClassifier : public ShapeClassifier { public: TessClassifier(bool pruner_only, tesseract::Classify* classify) : pruner_only_(pruner_only), classify_(classify) {} diff --git a/dict/dict.h b/dict/dict.h index 31d653af9c..a3883b5124 100644 --- a/dict/dict.h +++ b/dict/dict.h @@ -244,7 +244,7 @@ class Dict { CHAR_FRAGMENT_INFO *char_frag_info); /* stopper.cpp *************************************************************/ - bool NoDangerousAmbig(WERD_CHOICE *BestChoice, + bool TESS_API NoDangerousAmbig(WERD_CHOICE *BestChoice, DANGERR *fixpt, bool fix_replaceable, MATRIX* ratings); From 58062590ae47d33144f9beed0b7b513142d5fcf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 10 Mar 2017 23:21:31 +0100 Subject: [PATCH 122/132] Fix some typos in comments (found by codespell) Signed-off-by: Stefan Weil --- textord/scanedg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/textord/scanedg.cpp b/textord/scanedg.cpp index dbb3b66298..0430843619 100644 --- a/textord/scanedg.cpp +++ b/textord/scanedg.cpp @@ -335,7 +335,7 @@ void join_edges(CRACKEDGE *edge1, // edges to join if (edge1->pos.x() + edge1->stepx != edge2->pos.x() || edge1->pos.y() + edge1->stepy != edge2->pos.y()) { CRACKEDGE *tempedge = edge1; - edge1 = edge2; // swap araound + edge1 = edge2; // swap around edge2 = tempedge; } From a692cca65b84997f8e4a3e84c21c14dd5c612ec5 Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Tue, 14 Mar 2017 11:12:40 +0300 Subject: [PATCH 123/132] Update appveyor.yml --- appveyor.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index b6b7f86457..909299f190 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -24,15 +24,17 @@ before_build: build_script: - mkdir build + - mkdir build\bin + - mkdir build\bin\Release - cd build #- cmd: 'echo local_settings: > cppan.yml' #- cmd: 'echo generator: %generator% >> cppan.yml' #- cmd: 'echo use_shared_libs: true >> cppan.yml' #- cppan --build .. - cmake .. -G "%generator%" -DBUILD_TRAINING_TOOLS=Off -DAPPVEYOR=1 - - cmake --build . --config Release + - cmake --build . --config Release > bin\Release\log.txt 2>&1 artifacts: - path: build\bin\Release #- path: build - name: tesseract-$(APPVEYOR_BUILD_VERSION) \ No newline at end of file + name: tesseract-$(APPVEYOR_BUILD_VERSION) From ffea3faaf2f18b9373cbe635d13dc1535aa9b776 Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Tue, 14 Mar 2017 22:54:01 +0300 Subject: [PATCH 124/132] Disable warnings on Appveyor. --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index dc5e088b8f..a141d50ddd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,6 +82,9 @@ if (WIN32) add_definitions(-D_CRT_SECURE_NO_WARNINGS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP") + if (APPVEYOR) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W0") + endif() endif() set(LIB_Ws2_32 Ws2_32) From eea6d675675fdb7b31c88c0b26a7f99c7625b5f1 Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Wed, 15 Mar 2017 01:01:13 +0300 Subject: [PATCH 125/132] Update CMakeLists.txt --- CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a141d50ddd..68473fb4ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -231,7 +231,6 @@ if (NOT CPPAN_BUILD) export(TARGETS libtesseract FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake) else() target_link_libraries (libtesseract pvt.cppan.demo.danbloomberg.leptonica) - add_dependencies (libtesseract cppan) file(WRITE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake "include(${CMAKE_BINARY_DIR}/cppan.cmake)\n") export(TARGETS libtesseract APPEND FILE ${CMAKE_BINARY_DIR}/TesseractTargets.cmake) endif() From f30cac479d1e1f857528da57ba82977b830db87a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Fri, 17 Mar 2017 20:44:37 +0100 Subject: [PATCH 126/132] libtiff is needed for windows build of tesseract executable --- api/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/Makefile.am b/api/Makefile.am index d8c1e54cda..df4bcd6368 100644 --- a/api/Makefile.am +++ b/api/Makefile.am @@ -82,7 +82,7 @@ tesseract_LDADD = libtesseract.la tesseract_LDFLAGS = $(OPENCL_LDFLAGS) if T_WIN -tesseract_LDADD += -lws2_32 +tesseract_LDADD += -lws2_32 -ltiff libtesseract_la_LDFLAGS += -no-undefined -Wl,--as-needed -lws2_32 endif if ADD_RT From 4925e6c44904c42df95a0c0040fca9d1b1e3c794 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Mon, 20 Mar 2017 20:08:41 +0000 Subject: [PATCH 127/132] Add item to ChangeLog for options writing to stdout instead of stderr --- ChangeLog | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog b/ChangeLog index 2ac86b34c5..4836aeca73 100644 --- a/ChangeLog +++ b/ChangeLog @@ -9,6 +9,7 @@ * Improved the embedded pdf font (pdf.ttf). * Enable selection of OCR engine mode from command line. * Changed tesseract command line parameter '-psm' to '--psm'. + * Write output of tesseract --help, --version and --list-langs to stdout instead of stderr. * Added new C API for orientation and script detection, removed the old one. * Increased minimum autoconf version to 2.59. * Removed dead code. From 09acc4f487406c35f2ca782da20a1d7f598715a8 Mon Sep 17 00:00:00 2001 From: Wilson Mar Date: Sat, 18 Mar 2017 06:13:53 -0400 Subject: [PATCH 128/132] Update README.md heading markdown --- README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 04c5112fce..9dbdd8d2aa 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ For the latest online version of the README.md see: https://github.com/tesseract-ocr/tesseract/blob/master/README.md -#About +# About This package contains an OCR engine - `libtesseract` and a command line program - `tesseract`. @@ -24,7 +24,7 @@ You should note that in many cases, in order to get better OCR results, you'll n The latest stable version is 3.05.00, released in February 2017. -#Brief history +# Brief history Tesseract was originally developed at Hewlett-Packard Laboratories Bristol and at Hewlett-Packard Co, Greeley Colorado between 1985 and 1994, with some @@ -34,13 +34,13 @@ In 2005 Tesseract was open sourced by HP. Since 2006 it is developed by Google. [Release Notes](https://github.com/tesseract-ocr/tesseract/wiki/ReleaseNotes) -#For developers +# For developers Developers can use `libtesseract` [C](https://github.com/tesseract-ocr/tesseract/blob/master/api/capi.h) or [C++](https://github.com/tesseract-ocr/tesseract/blob/master/api/baseapi.h) API to build their own application. If you need bindings to `libtesseract` for other programming languages, please see the [wrapper](https://github.com/tesseract-ocr/tesseract/wiki/AddOns#tesseract-wrappers) section on AddOns wiki page. Documentation of Tesseract generated from source code by doxygen can be found on [tesseract-ocr.github.io](http://tesseract-ocr.github.io/). -#License +# License The code in this repository is licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -56,11 +56,11 @@ Documentation of Tesseract generated from source code by doxygen can be found on **NOTE**: This software depends on other packages that may be licensed under different open source licenses. -#Installing Tesseract +# Installing Tesseract You can either [Install Tesseract via pre-built binary package](https://github.com/tesseract-ocr/tesseract/wiki) or [build it from source](https://github.com/tesseract-ocr/tesseract/wiki/Compiling). -#Running Tesseract +# Running Tesseract Basic command line usage: @@ -68,7 +68,7 @@ Basic command line usage: For more information about the various command line options use `tesseract --help` or `man tesseract`. -#Support +# Support Mailing-lists: * [tesseract-ocr](https://groups.google.com/d/forum/tesseract-ocr) - For tesseract users. From f80c3b27dfdcf0a434d3ee7d93d8fe871e4e572b Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Wed, 22 Mar 2017 21:21:57 +0300 Subject: [PATCH 129/132] Fix windows build. --- ccstruct/fontinfo.h | 2 ++ classify/mastertrainer.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ccstruct/fontinfo.h b/ccstruct/fontinfo.h index 1835d72ec6..597a179c97 100644 --- a/ccstruct/fontinfo.h +++ b/ccstruct/fontinfo.h @@ -25,6 +25,8 @@ #include "host.h" #include "unichar.h" +#include + template class UnicityTable; namespace tesseract { diff --git a/classify/mastertrainer.h b/classify/mastertrainer.h index 0bdc760648..25119d2382 100644 --- a/classify/mastertrainer.h +++ b/classify/mastertrainer.h @@ -298,7 +298,7 @@ class TESS_API MasterTrainer { int debug_level_; // Feature map used to construct reduced feature spaces for compact // classifiers. - IntFeatureMap TESS_API feature_map_; + IntFeatureMap feature_map_; // Vector of Pix pointers used for classifiers that need the image. // Indexed by page_num_ in the samples. // These images are owned by the trainer and need to be pixDestroyed. From 088186f48930caeca7fe5e8760aa2e621a73c657 Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Wed, 22 Mar 2017 21:45:09 +0300 Subject: [PATCH 130/132] Update appveyor.yml --- appveyor.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 909299f190..752a91845f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -5,7 +5,11 @@ platform: - Win64 configuration: - - Release + - Release + + # for curl + install: + - set PATH=C:\Program Files\Git\mingw64\bin;%PATH% before_build: - if %platform%==Win32 set generator=Visual Studio 14 2015 From d0951da9406afb151458863e3d23c2fade8d294d Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Thu, 23 Mar 2017 01:23:07 +0300 Subject: [PATCH 131/132] Update appveyor.yml --- appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/appveyor.yml b/appveyor.yml index 752a91845f..91cab93986 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -42,3 +42,4 @@ artifacts: - path: build\bin\Release #- path: build name: tesseract-$(APPVEYOR_BUILD_VERSION) + From 9c1034d4e4a240101326f867ecdff05ebaa6a4d5 Mon Sep 17 00:00:00 2001 From: Egor Pugin Date: Thu, 23 Mar 2017 08:21:48 +0300 Subject: [PATCH 132/132] Update appveyor.yml --- appveyor.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 91cab93986..c6aeee9221 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,4 +1,4 @@ -os: Visual Studio 2015 +os: Visual Studio 2017 platform: - Win32 @@ -12,8 +12,8 @@ configuration: - set PATH=C:\Program Files\Git\mingw64\bin;%PATH% before_build: - - if %platform%==Win32 set generator=Visual Studio 14 2015 - - if %platform%==Win64 set generator=Visual Studio 14 2015 Win64 + - if %platform%==Win32 set generator=Visual Studio 15 2017 + - if %platform%==Win64 set generator=Visual Studio 15 2017 Win64 - if %platform%==Win32 set vcplatform=Win32 - if %platform%==Win64 set vcplatform=x64