Skip to content

Commit

Permalink
Fixed multilang for LSTM, pushed cube to one side without actually de…
Browse files Browse the repository at this point in the history
…leting it
  • Loading branch information
theraysmith committed Dec 5, 2016
1 parent 798d79a commit 5deebe6
Show file tree
Hide file tree
Showing 14 changed files with 139 additions and 124 deletions.
7 changes: 3 additions & 4 deletions api/tesseractmain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,9 @@ void PrintHelpForOEM() {
const char* msg =
"OCR Engine modes:\n"
" 0 Original Tesseract only.\n"
" 1 Cube only.\n"

This comment has been minimized.

Copy link
@tfmorris

tfmorris Mar 24, 2017

Contributor

Isn't this a breaking change to reshuffle the mode numbers?

This comment has been minimized.

Copy link
@amitdo

amitdo Mar 24, 2017

Collaborator

Yes, it is.

This comment has been minimized.

Copy link
@theraysmith

theraysmith via email Mar 29, 2017

Author Contributor
" 2 Tesseract + cube.\n"
" 3 Default, based on what is available.\n"
" 4 Neural nets (LSTM) only.\n";
" 1 Neural nets LSTM only.\n"
" 2 Tesseract + LSTM.\n"
" 3 Default, based on what is available.\n";

printf("%s", msg);
}
Expand Down
61 changes: 32 additions & 29 deletions ccmain/control.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,22 @@
#include <errno.h>
#endif
#include <ctype.h>
#include "ocrclass.h"
#include "werdit.h"
#include "callcpp.h"
#include "control.h"
#include "docqual.h"
#include "drawfx.h"
#include "tessbox.h"
#include "tessvars.h"
#include "pgedit.h"
#include "reject.h"
#include "fixspace.h"
#include "docqual.h"
#include "control.h"
#include "output.h"
#include "callcpp.h"
#include "globals.h"
#include "lstmrecognizer.h"
#include "ocrclass.h"
#include "output.h"
#include "pgedit.h"
#include "reject.h"
#include "sorthelper.h"
#include "tessbox.h"
#include "tesseractclass.h"
#include "tessvars.h"
#include "werdit.h"

#define MIN_FONT_ROW_COUNT 8
#define MAX_XHEIGHT_DIFF 3
Expand Down Expand Up @@ -192,8 +193,8 @@ void Tesseract::SetupWordPassN(int pass_n, WordData* word) {
WERD_RES* word_res = new WERD_RES;
word_res->InitForRetryRecognition(*word->word);
word->lang_words.push_back(word_res);
// Cube doesn't get setup for pass2.
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_CUBE_ONLY) {
// LSTM doesn't get setup for pass2.
if (pass_n == 1 || lang_t->tessedit_ocr_engine_mode != OEM_LSTM_ONLY) {
word_res->SetupForRecognition(
lang_t->unicharset, lang_t, BestPix(),
lang_t->tessedit_ocr_engine_mode, NULL,
Expand Down Expand Up @@ -301,16 +302,6 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
const TBOX* target_word_box,
const char* word_config,
int dopasses) {
// PSM_RAW_LINE is a special-case mode in which the layout analysis is
// completely ignored and LSTM is run on the raw image. There is no hope
// of running normal tesseract in this situation or of integrating output.
#ifndef ANDROID_BUILD
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY &&
tessedit_pageseg_mode == PSM_RAW_LINE) {
RecogRawLine(page_res);
return true;
}
#endif
PAGE_RES_IT page_res_it(page_res);

if (tessedit_minimal_rej_pass1) {
Expand Down Expand Up @@ -397,8 +388,7 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
if (!RecogAllWordsPassN(2, monitor, &page_res_it, &words)) return false;
}

// The next passes can only be run if tesseract has been used, as cube
// doesn't set all the necessary outputs in WERD_RES.
// The next passes are only required for Tess-only.
if (AnyTessLang() && !AnyLSTMLang()) {
// ****************** Pass 3 *******************
// Fix fuzzy spaces.
Expand Down Expand Up @@ -451,8 +441,13 @@ bool Tesseract::recog_all_words(PAGE_RES* page_res,
for (page_res_it.restart_page(); page_res_it.word() != NULL;
page_res_it.forward()) {
WERD_RES* word = page_res_it.word();
if (word->best_choice == NULL || word->best_choice->length() == 0)
POLY_BLOCK* pb = page_res_it.block()->block != NULL
? page_res_it.block()->block->poly_block()
: NULL;
if (word->best_choice == NULL || word->best_choice->length() == 0 ||
(word->best_choice->IsAllSpaces() && (pb == NULL || pb->IsText()))) {
page_res_it.DeleteCurrentWord();
}
}

if (monitor != NULL) {
Expand Down Expand Up @@ -1376,12 +1371,20 @@ void Tesseract::classify_word_pass1(const WordData& word_data,
cube_word_pass1(block, row, *in_word);
return;
}
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
if (!(*in_word)->odd_size) {
#endif
#ifndef ANDROID_BUILD
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
if (!(*in_word)->odd_size || tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
LSTMRecognizeWord(*block, row, *in_word, out_words);
if (!out_words->empty())
return; // Successful lstm recognition.
}
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
// No fallback allowed, so use a fake.
(*in_word)->SetupFake(lstm_recognizer_->GetUnicharset());
return;
}
// Fall back to tesseract for failed words or odd words.
(*in_word)->SetupForRecognition(unicharset, this, BestPix(),
OEM_TESSERACT_ONLY, NULL,
Expand Down Expand Up @@ -1523,7 +1526,7 @@ void Tesseract::classify_word_pass2(const WordData& word_data,
WERD_RES** in_word,
PointerVector<WERD_RES>* out_words) {
// Return if we do not want to run Tesseract.
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
return;
}
ROW* row = word_data.row;
Expand Down Expand Up @@ -1908,7 +1911,7 @@ static void find_modal_font( //good chars in word
* Get the fonts for the word.
*/
void Tesseract::set_word_fonts(WERD_RES *word) {
// Don't try to set the word fonts for a cube word, as the configs
// Don't try to set the word fonts for an lstm word, as the configs
// will be meaningless.
if (word->chopped_word == NULL) return;
ASSERT_HOST(word->best_choice != NULL);
Expand Down
40 changes: 17 additions & 23 deletions ccmain/linerec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,19 +219,6 @@ ImageData* Tesseract::GetRectImage(const TBOX& box, const BLOCK& block,
}

#ifndef ANDROID_BUILD
// Top-level function recognizes a single raw line.
void Tesseract::RecogRawLine(PAGE_RES* page_res) {
PAGE_RES_IT it(page_res);
PointerVector<WERD_RES> words;
LSTMRecognizeWord(*it.block()->block, it.row()->row, it.word(), &words);
if (getDict().stopper_debug_level >= 1) {
for (int w = 0; w < words.size(); ++w) {
words[w]->DebugWordChoices(true, NULL);
}
}
it.ReplaceCurrentWord(&words);
}

// Recognizes a word or group of words, converting to WERD_RES in *words.
// Analogous to classify_word_pass1, but can handle a group of words as well.
void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
Expand Down Expand Up @@ -268,7 +255,17 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
// for each of the output words.
// If we drop a word as junk, then there is always a space in front of the
// next.
bool deleted_prev = false;
const Dict* stopper_dict = lstm_recognizer_->GetDict();
if (stopper_dict == nullptr) stopper_dict = &getDict();
bool any_nonspace_delimited = false;
for (int w = 0; w < words->size(); ++w) {
WERD_RES* word = (*words)[w];
if (word->best_choice != nullptr &&
word->best_choice->ContainsAnyNonSpaceDelimited()) {
any_nonspace_delimited = true;
break;
}
}
for (int w = 0; w < words->size(); ++w) {
WERD_RES* word = (*words)[w];
if (word->best_choice == NULL) {
Expand All @@ -284,9 +281,7 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
}
if (word->best_choice == NULL) {
// It is a dud.
words->remove(w);
--w;
deleted_prev = true;
word->SetupFake(lstm_recognizer_->GetUnicharset());
} else {
// Set the best state.
for (int i = 0; i < word->best_choice->length(); ++i) {
Expand Down Expand Up @@ -314,22 +309,21 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
word->best_choice->print();
}
// Discard words that are impossibly bad, but allow a bit more for
// dictionary words.
// dictionary words, and keep bad words in non-space-delimited langs.
if (word_certainty >= RecodeBeamSearch::kMinCertainty ||
any_nonspace_delimited ||
(word_certainty >= kWorstDictCertainty &&
Dict::valid_word_permuter(word->best_choice->permuter(), true))) {
word->best_choice->set_certainty(word_certainty);
if (deleted_prev) word->word->set_blanks(1);
word->tess_accepted = stopper_dict->AcceptableResult(word);
} else {
if (getDict().stopper_debug_level >= 1) {
tprintf("Deleting word with certainty %g\n", word_certainty);
word->best_choice->print();
}
// It is a dud.
words->remove(w);
--w;
deleted_prev = true;
word->SetupFake(lstm_recognizer_->GetUnicharset());
}
word->best_choice->set_certainty(word_certainty);
}
}
}
Expand Down
75 changes: 41 additions & 34 deletions ccmain/tessedit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ bool Tesseract::init_tesseract_lang_data(
// Determine which ocr engine(s) should be loaded and used for recognition.
if (oem != OEM_DEFAULT) tessedit_ocr_engine_mode.set_value(oem);
if (tessdata_manager_debug_level) {
tprintf("Loading Tesseract/Cube with tessedit_ocr_engine_mode %d\n",
tprintf("Loading Tesseract/LSTM with tessedit_ocr_engine_mode %d\n",
static_cast<int>(tessedit_ocr_engine_mode));
}

Expand All @@ -174,9 +174,37 @@ bool Tesseract::init_tesseract_lang_data(
return true;
}

// The various OcrEngineMode settings (see publictypes.h) determine which
// engine-specific data files need to be loaded. Currently everything needs
// the base tesseract data, which supplies other useful information, but
// alternative engines, such as LSTM are optional.
#ifndef ANDROID_BUILD
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY ||
tessedit_ocr_engine_mode == OEM_TESSERACT_LSTM_COMBINED) {
if (tessdata_manager.swap()) {
tprintf("Error: LSTM requested on big-endian hardware!!\n");
tprintf("Big-endian not yet supported! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
} else if (tessdata_manager.SeekToStart(TESSDATA_LSTM)) {
lstm_recognizer_ = new LSTMRecognizer;
TFile fp;
fp.Open(tessdata_manager.GetDataFilePtr(), -1);
ASSERT_HOST(lstm_recognizer_->DeSerialize(tessdata_manager.swap(), &fp));
if (lstm_use_matrix)
lstm_recognizer_->LoadDictionary(tessdata_path.string(), language);
} else {
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
}
}
#endif

// Load the unicharset
if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
// Avoid requiring a unicharset when we aren't running base tesseract.
unicharset.CopyFrom(lstm_recognizer_->GetUnicharset());
} else if (!tessdata_manager.SeekToStart(TESSDATA_UNICHARSET) ||
!unicharset.load_from_file(tessdata_manager.GetDataFilePtr())) {
return false;
}
if (unicharset.size() > MAX_NUM_CLASSES) {
Expand All @@ -203,11 +231,6 @@ bool Tesseract::init_tesseract_lang_data(
ambigs_debug_level, use_ambigs_for_adaption, &unicharset);
if (tessdata_manager_debug_level) tprintf("Loaded ambigs\n");
}

// The various OcrEngineMode settings (see publictypes.h) determine which
// engine-specific data files need to be loaded. Currently everything needs
// the base tesseract data, which supplies other useful information, but
// alternative engines, such as cube and LSTM are optional.
#ifndef NO_CUBE_BUILD
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY) {
ASSERT_HOST(init_cube_objects(false, &tessdata_manager));
Expand All @@ -217,22 +240,6 @@ bool Tesseract::init_tesseract_lang_data(
ASSERT_HOST(init_cube_objects(true, &tessdata_manager));
if (tessdata_manager_debug_level)
tprintf("Loaded Cube with combiner\n");
} else if (tessedit_ocr_engine_mode == OEM_LSTM_ONLY) {
if (tessdata_manager.swap()) {
tprintf("Error: LSTM requested on big-endian hardware!!\n");
tprintf("Big-endian not yet supported! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
} else if (tessdata_manager.SeekToStart(TESSDATA_LSTM)) {
lstm_recognizer_ = new LSTMRecognizer;
TFile fp;
fp.Open(tessdata_manager.GetDataFilePtr(), -1);
ASSERT_HOST(lstm_recognizer_->DeSerialize(tessdata_manager.swap(), &fp));
if (lstm_use_matrix)
lstm_recognizer_->LoadDictionary(tessdata_path.string(), language);
} else {
tprintf("Error: LSTM requested, but not present!! Loading tesseract.\n");
tessedit_ocr_engine_mode.set_value(OEM_TESSERACT_ONLY);
}
}
#endif
// Init ParamsModel.
Expand Down Expand Up @@ -425,16 +432,16 @@ int Tesseract::init_tesseract_internal(
tessdata_manager.End();
return 0;
}
// If only Cube will be used, skip loading Tesseract classifier's
// pre-trained templates.
bool init_tesseract_classifier =
tessedit_ocr_engine_mode != OEM_CUBE_ONLY;
// If only Cube will be used and if it has its own Unicharset,
// skip initializing permuter and loading Tesseract Dawgs.
bool init_dict =
!(tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET));
program_editup(textbase, init_tesseract_classifier, init_dict);
// If only LSTM will be used, skip loading Tesseract classifier's
// pre-trained templates and dictionary.
bool init_tesseract = tessedit_ocr_engine_mode != OEM_LSTM_ONLY &&
tessedit_ocr_engine_mode != OEM_CUBE_ONLY;
bool init_dict = init_tesseract;
if (tessedit_ocr_engine_mode == OEM_CUBE_ONLY &&
!tessdata_manager.SeekToStart(TESSDATA_CUBE_UNICHARSET)) {
init_dict = true;
}
program_editup(textbase, init_tesseract, init_dict);
tessdata_manager.End();
return 0; //Normal exit
}
Expand Down
12 changes: 6 additions & 6 deletions ccmain/tesseract_cube_combiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
// the recognition results of Tesseract and Cube at the word level

#include <algorithm>
#include <string>
#include <vector>
#include <wctype.h>

#include "tesseract_cube_combiner.h"
Expand Down Expand Up @@ -125,12 +127,10 @@ bool TesseractCubeCombiner::ValidWord(const string &str) {
// Public method for computing the combiner features. The agreement
// output parameter will be true if both answers are identical,
// and false otherwise.
bool TesseractCubeCombiner::ComputeCombinerFeatures(const string &tess_str,
int tess_confidence,
CubeObject *cube_obj,
WordAltList *cube_alt_list,
vector<double> *features,
bool *agreement) {
bool TesseractCubeCombiner::ComputeCombinerFeatures(
const string &tess_str, int tess_confidence, CubeObject *cube_obj,
WordAltList *cube_alt_list, std::vector<double> *features,
bool *agreement) {
features->clear();
*agreement = false;
if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0)
Expand Down
4 changes: 2 additions & 2 deletions ccmain/tesseractclass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ Tesseract::Tesseract()
" (Values from PageSegMode enum in publictypes.h)",
this->params()),
INT_INIT_MEMBER(tessedit_ocr_engine_mode, tesseract::OEM_TESSERACT_ONLY,
"Which OCR engine(s) to run (Tesseract, Cube, both)."
"Which OCR engine(s) to run (Tesseract, LSTM, both)."
" Defaults to loading and running only Tesseract"
" (no Cube,no combiner)."
" (no LSTM,no combiner)."
" Values from OcrEngineMode enum in tesseractclass.h)",
this->params()),
STRING_MEMBER(tessedit_char_blacklist, "",
Expand Down
Loading

0 comments on commit 5deebe6

Please sign in to comment.