From af037c27e7c9e7fdd0f9dea6dbfee599e57b6adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=83=C2=BD?= Date: Mon, 2 Apr 2018 19:09:22 +0200 Subject: [PATCH 01/10] rename version.h.in because the filename is too general for distribution --- CMakeLists.txt | 4 ++-- api/{version.h.in => tess_version.h.in} | 0 configure.ac | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename api/{version.h.in => tess_version.h.in} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index d6a87f95a8..db6071b4a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,8 +121,8 @@ configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY) set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract") configure_file( - ${CMAKE_SOURCE_DIR}/api/version.h.in - ${CMAKE_BINARY_DIR}/api/version.h @ONLY) + ${CMAKE_SOURCE_DIR}/api/tess_version.h.in + ${CMAKE_BINARY_DIR}/api/tess_version.h @ONLY) configure_file( ${CMAKE_SOURCE_DIR}/vs2010/tesseract/tesseract.rc.in ${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc @ONLY) diff --git a/api/version.h.in b/api/tess_version.h.in similarity index 100% rename from api/version.h.in rename to api/tess_version.h.in diff --git a/configure.ac b/configure.ac index e62282e277..9ab6dcbf85 100644 --- a/configure.ac +++ b/configure.ac @@ -479,7 +479,7 @@ fi # Output files AC_CONFIG_FILES([Makefile tesseract.pc]) AC_CONFIG_FILES([api/Makefile]) -AC_CONFIG_FILES([api/version.h]) +AC_CONFIG_FILES([api/tess_version.h]) AC_CONFIG_FILES([arch/Makefile]) AC_CONFIG_FILES([ccmain/Makefile]) AC_CONFIG_FILES([opencl/Makefile]) From 64a73155ba8a281c03cb9ac41121e94a809f771a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=BD?= Date: Mon, 2 Apr 2018 19:11:46 +0200 Subject: [PATCH 02/10] add licence info --- api/tess_version.h.in | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/api/tess_version.h.in b/api/tess_version.h.in index fd1ad27608..2e229f92df 100644 --- a/api/tess_version.h.in +++ b/api/tess_version.h.in @@ -1,3 +1,23 @@ +/////////////////////////////////////////////////////////////////////// +// File: version.h +// Description: Version information +// +// (C) Copyright 2018, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_API_VERSION_H_ +#define TESSERACT_API_VERSION_H_ + #define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@ #define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@ #define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@ @@ -6,3 +26,5 @@ TESSERACT_MINOR_VERSION << 8 | \ TESSERACT_MICRO_VERSION) #define TESSERACT_VERSION_STR "@PACKAGE_VERSION@" + +#endif // TESSERACT_API_VERSION_H_ From e9e1e93686bcaf605076057dca1f35e717da1692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zdenko=20Podobn=C3=83=C2=BD?= Date: Mon, 2 Apr 2018 19:14:38 +0200 Subject: [PATCH 03/10] add tess_version.h to distribution --- CMakeLists.txt | 1 + api/Makefile.am | 4 ++-- api/baseapi.h | 1 + training/Makefile.am | 3 ++- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index db6071b4a6..628a5fd80c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -322,6 +322,7 @@ install(FILES api/baseapi.h api/capi.h api/renderer.h + ${CMAKE_CURRENT_BINARY_DIR}/api/tess_version.h #from arch/makefile.am arch/dotproductavx.h diff --git a/api/Makefile.am b/api/Makefile.am index 855b7443ea..2e33cebaf9 100644 --- a/api/Makefile.am +++ b/api/Makefile.am @@ -5,7 +5,7 @@ AM_CPPFLAGS += -DLOCALEDIR=\"$(localedir)\"\ -I$(top_srcdir)/textord -I$(top_srcdir)/dict \ -I$(top_srcdir)/classify -I$(top_srcdir)/ccmain \ -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \ - -I$(top_srcdir)/opencl + -I$(top_srcdir)/opencl -I$(top_builddir)/api AM_CPPFLAGS += $(OPENCL_CPPFLAGS) @@ -13,7 +13,7 @@ if VISIBILITY AM_CPPFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden endif -include_HEADERS = apitypes.h baseapi.h capi.h renderer.h +include_HEADERS = apitypes.h baseapi.h capi.h renderer.h tess_version.h lib_LTLIBRARIES = noinst_LTLIBRARIES = libtesseract_api.la diff --git a/api/baseapi.h b/api/baseapi.h index 4263445e12..6a7a4e81ae 100644 --- a/api/baseapi.h +++ b/api/baseapi.h @@ -24,6 +24,7 @@ // To avoid collision with other typenames include the ABSOLUTE MINIMUM // complexity of includes here. Use forward declarations wherever possible // and hide includes of complex types in baseapi.cpp. +#include "tess_version.h" #include "apitypes.h" #include "pageiterator.h" #include "platform.h" diff --git a/training/Makefile.am b/training/Makefile.am index 0c80b18fb5..a37239f803 100644 --- a/training/Makefile.am +++ b/training/Makefile.am @@ -7,7 +7,8 @@ AM_CPPFLAGS += \ -I$(top_srcdir)/viewer \ -I$(top_srcdir)/textord -I$(top_srcdir)/dict \ -I$(top_srcdir)/classify -I$(top_srcdir)/display \ - -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil + -I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \ + -I$(top_builddir)/api EXTRA_DIST = language-specific.sh tesstrain.sh tesstrain_utils.sh From 10f4998aee3ccc68e9c4931ce744dd292ad6ff19 Mon Sep 17 00:00:00 2001 From: Zdenko Podobny Date: Mon, 2 Apr 2018 22:30:55 +0200 Subject: [PATCH 04/10] fix cmake build of training tools --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 628a5fd80c..23e78be3e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -154,6 +154,7 @@ include_directories(${Leptonica_INCLUDE_DIRS}) include_directories(${CMAKE_BINARY_DIR}) include_directories(api) +include_directories(${CMAKE_BINARY_DIR}/api) include_directories(arch) include_directories(ccmain) include_directories(ccstruct) From f9157fd91db9140adacc4d7cd9af5677e4f0163e Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 8 Apr 2018 14:44:14 +0200 Subject: [PATCH 05/10] configure: Don't use AM_MAINTAINER_MODE by default That macro disables automated updates when configure.ac or a Makefile.am changes. Normally those updates are wanted because users typically forget running ./autogen.sh. See also the GNU documentation why AM_MAINTAINER_MODE should not be used: https://www.gnu.org/software/automake/manual/html_node/maintainer_002dmode.html Signed-off-by: Stefan Weil --- configure.ac | 1 - doc/Makefile.am | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index 9ab6dcbf85..c4a6ca6875 100644 --- a/configure.ac +++ b/configure.ac @@ -65,7 +65,6 @@ AC_SUBST([GENERIC_RELEASE]) AC_SUBST([GENERIC_VERSION]) AC_CONFIG_HEADERS([config_auto.h:config/config.h.in]) -AM_MAINTAINER_MODE # default conditional AM_CONDITIONAL([T_WIN], false) diff --git a/doc/Makefile.am b/doc/Makefile.am index d2e7b9333f..e016eff2d8 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,4 +1,4 @@ -if MAINTAINER_MODE +# doc/Makefile.am asciidoc=asciidoc -d manpage @@ -28,5 +28,3 @@ man_MANS = \ $(asciidoc) -o $@ $< MAINTAINERCLEANFILES = $(man_MANS) Doxyfile - -endif # MAINTAINER_MODE From 7cf2e2a4c871f21ae6bf22e7bc856bb012c32fcd Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Thu, 1 Feb 2018 08:19:57 +0100 Subject: [PATCH 06/10] Overload method ForwardTimeStep (CID 1385636 Explicit null dereferenced) This avoids NULL parameters and fixes a warning from Coverity Scan. Signed-off-by: Stefan Weil --- lstm/fullyconnected.cpp | 33 +++++++++++++++++++-------------- lstm/fullyconnected.h | 5 +++-- lstm/lstm.cpp | 4 ++-- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/lstm/fullyconnected.cpp b/lstm/fullyconnected.cpp index 52c0cbf36b..ea368ca223 100644 --- a/lstm/fullyconnected.cpp +++ b/lstm/fullyconnected.cpp @@ -147,15 +147,12 @@ void FullyConnected::Forward(bool debug, const NetworkIO& input, int thread_id = 0; #endif double* temp_line = temp_lines[thread_id]; - const double* d_input = nullptr; - const int8_t* i_input = nullptr; if (input.int_mode()) { - i_input = input.i(t); + ForwardTimeStep(input.i(t), t, temp_line); } else { input.ReadTimeStep(t, curr_input[thread_id]); - d_input = curr_input[thread_id]; + ForwardTimeStep(curr_input[thread_id], t, temp_line); } - ForwardTimeStep(d_input, i_input, t, temp_line); output->WriteTimeStep(t, temp_line); if (IsTraining() && type_ != NT_SOFTMAX) { acts_.CopyTimeStepFrom(t, *output, t); @@ -188,15 +185,7 @@ void FullyConnected::SetupForward(const NetworkIO& input, } } -void FullyConnected::ForwardTimeStep(const double* d_input, const int8_t* i_input, - int t, double* output_line) { - // input is copied to source_ line-by-line for cache coherency. - if (IsTraining() && external_source_ == nullptr && d_input != nullptr) - source_t_.WriteStrided(t, d_input); - if (d_input != nullptr) - weights_.MatrixDotVector(d_input, output_line); - else - weights_.MatrixDotVector(i_input, output_line); +void FullyConnected::ForwardTimeStep(int t, double* output_line) { if (type_ == NT_TANH) { FuncInplace(no_, output_line); } else if (type_ == NT_LOGISTIC) { @@ -214,6 +203,22 @@ void FullyConnected::ForwardTimeStep(const double* d_input, const int8_t* i_inpu } } +void FullyConnected::ForwardTimeStep(const double* d_input, + int t, double* output_line) { + // input is copied to source_ line-by-line for cache coherency. + if (IsTraining() && external_source_ == NULL) + source_t_.WriteStrided(t, d_input); + weights_.MatrixDotVector(d_input, output_line); + ForwardTimeStep(t, output_line); +} + +void FullyConnected::ForwardTimeStep(const int8_t* i_input, + int t, double* output_line) { + // input is copied to source_ line-by-line for cache coherency. + weights_.MatrixDotVector(i_input, output_line); + ForwardTimeStep(t, output_line); +} + // Runs backward propagation of errors on the deltas line. // See NetworkCpp for a detailed discussion of the arguments. bool FullyConnected::Backward(bool debug, const NetworkIO& fwd_deltas, diff --git a/lstm/fullyconnected.h b/lstm/fullyconnected.h index 6b9b22a9eb..2c886f9fd0 100644 --- a/lstm/fullyconnected.h +++ b/lstm/fullyconnected.h @@ -91,8 +91,9 @@ class FullyConnected : public Network { // Components of Forward so FullyConnected can be reused inside LSTM. void SetupForward(const NetworkIO& input, const TransposedArray* input_transpose); - void ForwardTimeStep(const double* d_input, const int8_t* i_input, int t, - double* output_line); + void ForwardTimeStep(int t, double* output_line); + void ForwardTimeStep(const double* d_input, int t, double* output_line); + void ForwardTimeStep(const int8_t* i_input, int t, double* output_line); // Runs backward propagation of errors on the deltas line. // See Network for a detailed discussion of the arguments. diff --git a/lstm/lstm.cpp b/lstm/lstm.cpp index 516ad0ffae..f4b81ba0a4 100644 --- a/lstm/lstm.cpp +++ b/lstm/lstm.cpp @@ -396,9 +396,9 @@ void LSTM::Forward(bool debug, const NetworkIO& input, if (softmax_ != nullptr) { if (input.int_mode()) { int_output->WriteTimeStepPart(0, 0, ns_, curr_output); - softmax_->ForwardTimeStep(nullptr, int_output->i(0), t, softmax_output); + softmax_->ForwardTimeStep(int_output->i(0), t, softmax_output); } else { - softmax_->ForwardTimeStep(curr_output, nullptr, t, softmax_output); + softmax_->ForwardTimeStep(curr_output, t, softmax_output); } output->WriteTimeStep(t, softmax_output); if (type_ == NT_LSTM_SOFTMAX_ENCODED) { From d9156f2d1910da6df69a4ca844527f31afada2a9 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 18 Feb 2018 11:11:53 +0100 Subject: [PATCH 07/10] WorkingPartSet: Remove unused constructor Signed-off-by: Stefan Weil --- textord/workingpartset.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/textord/workingpartset.h b/textord/workingpartset.h index 2cbf53a43d..7bc52ab4c3 100644 --- a/textord/workingpartset.h +++ b/textord/workingpartset.h @@ -31,8 +31,6 @@ namespace tesseract { // therefore only used during construction of the regions. class WorkingPartSet : public ELIST_LINK { public: - WorkingPartSet() { - } explicit WorkingPartSet(ColPartition* column) : column_(column), latest_part_(NULL), part_it_(&part_set_) { } From ef31eaa7d7037b0dcf4851108fa391967263244e Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Mon, 9 Apr 2018 19:06:59 +0200 Subject: [PATCH 08/10] Don't try to build manpages if asciidoc is missing Commit f9157fd91db9140adacc4d7cd9af5677e4f0163e changed the rules for the documentation, so make always tried to build it and failed if asciidoc was missing since that commit. Now configure tests whether asciidoc is available and builds the documentation conditionally. It also reports that to the user. Signed-off-by: Stefan Weil --- configure.ac | 21 +++++++++++++++++++++ doc/Makefile.am | 4 ++++ 2 files changed, 25 insertions(+) diff --git a/configure.ac b/configure.ac index c4a6ca6875..a9345eb28f 100644 --- a/configure.ac +++ b/configure.ac @@ -414,6 +414,17 @@ AC_SYS_LARGEFILE AC_CHECK_FUNCS([getline]) +# ---------------------------------------- +# Check for programs needed to build documentation. +# ---------------------------------------- + +AC_CHECK_PROG([have_asciidoc], asciidoc, true, false) +if $have_asciidoc; then + AM_CONDITIONAL([ASCIIDOC], true) +else + AM_CONDITIONAL([ASCIIDOC], false) +fi + # ---------------------------------------- # Checks for typedefs, structures, and compiler characteristics. # ---------------------------------------- @@ -513,6 +524,16 @@ echo "You can now build and install $PACKAGE_NAME by running:" echo "" echo "$ make" echo "$ sudo make install" +echo "" + +AM_COND_IF([ASCIIDOC], + [ + echo "This will also build the documentation." + ], [ + echo "Documentation will not be built because asciidoc is missing." + ] +) + # echo "$ sudo make install LANGS=\"eng ara deu\"" # echo " Or:" # echo "$ sudo make install-langs" diff --git a/doc/Makefile.am b/doc/Makefile.am index e016eff2d8..0b9a251edd 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,5 +1,7 @@ # doc/Makefile.am +if ASCIIDOC + asciidoc=asciidoc -d manpage man_MANS = \ @@ -28,3 +30,5 @@ man_MANS = \ $(asciidoc) -o $@ $< MAINTAINERCLEANFILES = $(man_MANS) Doxyfile + +endif From 7a5033d1d98094fe82e2ea230fb8cc6e00dba396 Mon Sep 17 00:00:00 2001 From: FernandoGOT Date: Tue, 10 Apr 2018 10:16:37 -0300 Subject: [PATCH 09/10] added sleep 1 before generate_font_image to fix the problem of not finding fonts --- training/tesstrain_utils.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/training/tesstrain_utils.sh b/training/tesstrain_utils.sh index 3c980409db..c368f723c2 100755 --- a/training/tesstrain_utils.sh +++ b/training/tesstrain_utils.sh @@ -265,6 +265,7 @@ phase_I_generate_image() { local counter=0 for font in "${FONTS[@]}"; do + sleep 1 generate_font_image "${font}" & let counter=counter+1 let rem=counter%par_factor @@ -566,4 +567,3 @@ make__traineddata() { tlog "Moving ${TRAINING_DIR}/${LANG_CODE}.traineddata to ${OUTPUT_DIR}" cp -f ${TRAINING_DIR}/${LANG_CODE}.traineddata ${destfile} } - From 3917a192cabf1a5ef3a47fd9ac9aff1e05115ce9 Mon Sep 17 00:00:00 2001 From: FernandoGOT Date: Tue, 10 Apr 2018 14:22:33 -0300 Subject: [PATCH 10/10] fix for mktemp bug on MAC OS X --- training/tesstrain_utils.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/training/tesstrain_utils.sh b/training/tesstrain_utils.sh index c368f723c2..e24599ebde 100755 --- a/training/tesstrain_utils.sh +++ b/training/tesstrain_utils.sh @@ -192,7 +192,11 @@ parse_flags() { # Function initializes font config with a unique font cache dir. initialize_fontconfig() { - export FONT_CONFIG_CACHE=$(mktemp -d --tmpdir font_tmp.XXXXXXXXXX) + if [[ "$OSTYPE" == "darwin"* ]]; then + export FONT_CONFIG_CACHE=$(mktemp -d -t font_tmp.XXXXXXXXXX) + else + export FONT_CONFIG_CACHE=$(mktemp -d --tmpdir font_tmp.XXXXXXXXXX) + fi local sample_path=${FONT_CONFIG_CACHE}/sample_text.txt echo "Text" >${sample_path} run_command text2image --fonts_dir=${FONTS_DIR} \