Skip to content

Commit

Permalink
Merge branch 'master' of github.aaakk.us.kg-egorpugin:tesseract-ocr/tesseract
Browse files Browse the repository at this point in the history
  • Loading branch information
egorpugin committed Apr 10, 2018
2 parents 7dd2ecd + 9e1ecdb commit 742a087
Show file tree
Hide file tree
Showing 13 changed files with 95 additions and 39 deletions.
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ configure_file(${AUTOCONFIG_SRC} ${AUTOCONFIG} @ONLY)
set(INCLUDE_DIR "${CMAKE_INSTALL_PREFIX}/include" "${CMAKE_INSTALL_PREFIX}/include/tesseract")

configure_file(
${CMAKE_SOURCE_DIR}/api/version.h.in
${CMAKE_BINARY_DIR}/api/version.h @ONLY)
${CMAKE_SOURCE_DIR}/api/tess_version.h.in
${CMAKE_BINARY_DIR}/api/tess_version.h @ONLY)
configure_file(
${CMAKE_SOURCE_DIR}/vs2010/tesseract/tesseract.rc.in
${CMAKE_BINARY_DIR}/vs2010/tesseract/tesseract.rc @ONLY)
Expand Down Expand Up @@ -160,6 +160,7 @@ include_directories(${Leptonica_INCLUDE_DIRS})
include_directories(${CMAKE_BINARY_DIR})

include_directories(api)
include_directories(${CMAKE_BINARY_DIR}/api)
include_directories(arch)
include_directories(ccmain)
include_directories(ccstruct)
Expand Down Expand Up @@ -328,6 +329,7 @@ install(FILES
api/baseapi.h
api/capi.h
api/renderer.h
${CMAKE_CURRENT_BINARY_DIR}/api/tess_version.h

#from arch/makefile.am
arch/dotproductavx.h
Expand Down
4 changes: 2 additions & 2 deletions api/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ AM_CPPFLAGS += -DLOCALEDIR=\"$(localedir)\"\
-I$(top_srcdir)/textord -I$(top_srcdir)/dict \
-I$(top_srcdir)/classify -I$(top_srcdir)/ccmain \
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \
-I$(top_srcdir)/opencl
-I$(top_srcdir)/opencl -I$(top_builddir)/api

AM_CPPFLAGS += $(OPENCL_CPPFLAGS)

if VISIBILITY
AM_CPPFLAGS += -fvisibility=hidden -fvisibility-inlines-hidden
endif

include_HEADERS = apitypes.h baseapi.h capi.h renderer.h
include_HEADERS = apitypes.h baseapi.h capi.h renderer.h tess_version.h
lib_LTLIBRARIES =

noinst_LTLIBRARIES = libtesseract_api.la
Expand Down
1 change: 1 addition & 0 deletions api/baseapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
// and hide includes of complex types in baseapi.cpp.
#include "tess_version.h"
#include "apitypes.h"
#include "pageiterator.h"
#include "platform.h"
Expand Down
30 changes: 30 additions & 0 deletions api/tess_version.h.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
///////////////////////////////////////////////////////////////////////
// File: version.h
// Description: Version information
//
// (C) Copyright 2018, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

#ifndef TESSERACT_API_VERSION_H_
#define TESSERACT_API_VERSION_H_

#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
#define TESSERACT_VERSION \
(TESSERACT_MAJOR_VERSION << 16 | \
TESSERACT_MINOR_VERSION << 8 | \
TESSERACT_MICRO_VERSION)
#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"

#endif // TESSERACT_API_VERSION_H_
8 changes: 0 additions & 8 deletions api/version.h.in

This file was deleted.

24 changes: 22 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ AC_SUBST([GENERIC_RELEASE])
AC_SUBST([GENERIC_VERSION])

AC_CONFIG_HEADERS([config_auto.h:config/config.h.in])
AM_MAINTAINER_MODE

# default conditional
AM_CONDITIONAL([T_WIN], false)
Expand Down Expand Up @@ -415,6 +414,17 @@ AC_SYS_LARGEFILE
AC_CHECK_FUNCS([getline])
# ----------------------------------------
# Check for programs needed to build documentation.
# ----------------------------------------
AC_CHECK_PROG([have_asciidoc], asciidoc, true, false)
if $have_asciidoc; then
AM_CONDITIONAL([ASCIIDOC], true)
else
AM_CONDITIONAL([ASCIIDOC], false)
fi
# ----------------------------------------
# Checks for typedefs, structures, and compiler characteristics.
# ----------------------------------------
Expand Down Expand Up @@ -479,7 +489,7 @@ fi
# Output files
AC_CONFIG_FILES([Makefile tesseract.pc])
AC_CONFIG_FILES([api/Makefile])
AC_CONFIG_FILES([api/version.h])
AC_CONFIG_FILES([api/tess_version.h])
AC_CONFIG_FILES([arch/Makefile])
AC_CONFIG_FILES([ccmain/Makefile])
AC_CONFIG_FILES([opencl/Makefile])
Expand Down Expand Up @@ -514,6 +524,16 @@ echo "You can now build and install $PACKAGE_NAME by running:"
echo ""
echo "$ make"
echo "$ sudo make install"
echo ""
AM_COND_IF([ASCIIDOC],
[
echo "This will also build the documentation."
], [
echo "Documentation will not be built because asciidoc is missing."
]
)
# echo "$ sudo make install LANGS=\"eng ara deu\""
# echo " Or:"
# echo "$ sudo make install-langs"
Expand Down
6 changes: 4 additions & 2 deletions doc/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
if MAINTAINER_MODE
# doc/Makefile.am

if ASCIIDOC

asciidoc=asciidoc -d manpage

Expand Down Expand Up @@ -29,4 +31,4 @@ man_MANS = \

MAINTAINERCLEANFILES = $(man_MANS) Doxyfile

endif # MAINTAINER_MODE
endif
33 changes: 19 additions & 14 deletions lstm/fullyconnected.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,12 @@ void FullyConnected::Forward(bool debug, const NetworkIO& input,
int thread_id = 0;
#endif
double* temp_line = temp_lines[thread_id];
const double* d_input = nullptr;
const int8_t* i_input = nullptr;
if (input.int_mode()) {
i_input = input.i(t);
ForwardTimeStep(input.i(t), t, temp_line);
} else {
input.ReadTimeStep(t, curr_input[thread_id]);
d_input = curr_input[thread_id];
ForwardTimeStep(curr_input[thread_id], t, temp_line);
}
ForwardTimeStep(d_input, i_input, t, temp_line);
output->WriteTimeStep(t, temp_line);
if (IsTraining() && type_ != NT_SOFTMAX) {
acts_.CopyTimeStepFrom(t, *output, t);
Expand Down Expand Up @@ -188,15 +185,7 @@ void FullyConnected::SetupForward(const NetworkIO& input,
}
}

void FullyConnected::ForwardTimeStep(const double* d_input, const int8_t* i_input,
int t, double* output_line) {
// input is copied to source_ line-by-line for cache coherency.
if (IsTraining() && external_source_ == nullptr && d_input != nullptr)
source_t_.WriteStrided(t, d_input);
if (d_input != nullptr)
weights_.MatrixDotVector(d_input, output_line);
else
weights_.MatrixDotVector(i_input, output_line);
void FullyConnected::ForwardTimeStep(int t, double* output_line) {
if (type_ == NT_TANH) {
FuncInplace<GFunc>(no_, output_line);
} else if (type_ == NT_LOGISTIC) {
Expand All @@ -214,6 +203,22 @@ void FullyConnected::ForwardTimeStep(const double* d_input, const int8_t* i_inpu
}
}

void FullyConnected::ForwardTimeStep(const double* d_input,
int t, double* output_line) {
// input is copied to source_ line-by-line for cache coherency.
if (IsTraining() && external_source_ == NULL)
source_t_.WriteStrided(t, d_input);
weights_.MatrixDotVector(d_input, output_line);
ForwardTimeStep(t, output_line);
}

void FullyConnected::ForwardTimeStep(const int8_t* i_input,
int t, double* output_line) {
// input is copied to source_ line-by-line for cache coherency.
weights_.MatrixDotVector(i_input, output_line);
ForwardTimeStep(t, output_line);
}

// Runs backward propagation of errors on the deltas line.
// See NetworkCpp for a detailed discussion of the arguments.
bool FullyConnected::Backward(bool debug, const NetworkIO& fwd_deltas,
Expand Down
5 changes: 3 additions & 2 deletions lstm/fullyconnected.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ class FullyConnected : public Network {
// Components of Forward so FullyConnected can be reused inside LSTM.
void SetupForward(const NetworkIO& input,
const TransposedArray* input_transpose);
void ForwardTimeStep(const double* d_input, const int8_t* i_input, int t,
double* output_line);
void ForwardTimeStep(int t, double* output_line);
void ForwardTimeStep(const double* d_input, int t, double* output_line);
void ForwardTimeStep(const int8_t* i_input, int t, double* output_line);

// Runs backward propagation of errors on the deltas line.
// See Network for a detailed discussion of the arguments.
Expand Down
4 changes: 2 additions & 2 deletions lstm/lstm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,9 +396,9 @@ void LSTM::Forward(bool debug, const NetworkIO& input,
if (softmax_ != nullptr) {
if (input.int_mode()) {
int_output->WriteTimeStepPart(0, 0, ns_, curr_output);
softmax_->ForwardTimeStep(nullptr, int_output->i(0), t, softmax_output);
softmax_->ForwardTimeStep(int_output->i(0), t, softmax_output);
} else {
softmax_->ForwardTimeStep(curr_output, nullptr, t, softmax_output);
softmax_->ForwardTimeStep(curr_output, t, softmax_output);
}
output->WriteTimeStep(t, softmax_output);
if (type_ == NT_LSTM_SOFTMAX_ENCODED) {
Expand Down
2 changes: 0 additions & 2 deletions textord/workingpartset.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ namespace tesseract {
// therefore only used during construction of the regions.
class WorkingPartSet : public ELIST_LINK {
public:
WorkingPartSet() {
}
explicit WorkingPartSet(ColPartition* column)
: column_(column), latest_part_(NULL), part_it_(&part_set_) {
}
Expand Down
3 changes: 2 additions & 1 deletion training/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ AM_CPPFLAGS += \
-I$(top_srcdir)/viewer \
-I$(top_srcdir)/textord -I$(top_srcdir)/dict \
-I$(top_srcdir)/classify -I$(top_srcdir)/display \
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil
-I$(top_srcdir)/wordrec -I$(top_srcdir)/cutil \
-I$(top_builddir)/api

EXTRA_DIST = language-specific.sh tesstrain.sh tesstrain_utils.sh

Expand Down
8 changes: 6 additions & 2 deletions training/tesstrain_utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,11 @@ parse_flags() {

# Function initializes font config with a unique font cache dir.
initialize_fontconfig() {
export FONT_CONFIG_CACHE=$(mktemp -d --tmpdir font_tmp.XXXXXXXXXX)
if [[ "$OSTYPE" == "darwin"* ]]; then
export FONT_CONFIG_CACHE=$(mktemp -d -t font_tmp.XXXXXXXXXX)
else
export FONT_CONFIG_CACHE=$(mktemp -d --tmpdir font_tmp.XXXXXXXXXX)
fi
local sample_path=${FONT_CONFIG_CACHE}/sample_text.txt
echo "Text" >${sample_path}
run_command text2image --fonts_dir=${FONTS_DIR} \
Expand Down Expand Up @@ -265,6 +269,7 @@ phase_I_generate_image() {

local counter=0
for font in "${FONTS[@]}"; do
sleep 1
generate_font_image "${font}" &
let counter=counter+1
let rem=counter%par_factor
Expand Down Expand Up @@ -566,4 +571,3 @@ make__traineddata() {
tlog "Moving ${TRAINING_DIR}/${LANG_CODE}.traineddata to ${OUTPUT_DIR}"
cp -f ${TRAINING_DIR}/${LANG_CODE}.traineddata ${destfile}
}

0 comments on commit 742a087

Please sign in to comment.