Skip to content

Commit

Permalink
Deleted some dead LSTM code, making everything use the recoder
Browse files Browse the repository at this point in the history
  • Loading branch information
theraysmith committed Jul 14, 2017
1 parent aee910a commit 3ec11bd
Show file tree
Hide file tree
Showing 9 changed files with 43 additions and 511 deletions.
1 change: 1 addition & 0 deletions api/apitypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define TESSERACT_API_APITYPES_H_

#include "publictypes.h"
#include "version.h"

// The types used by the API and Page/ResultIterator can be found in:
// ccstruct/publictypes.h
Expand Down
5 changes: 0 additions & 5 deletions api/baseapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@
#ifndef TESSERACT_API_BASEAPI_H_
#define TESSERACT_API_BASEAPI_H_

#define TESSERACT_VERSION_STR "4.00.00alpha"
#define TESSERACT_VERSION 0x040000
#define MAKE_VERSION(major, minor, patch) (((major) << 16) | ((minor) << 8) | \
(patch))

#include <stdio.h>
// To avoid collision with other typenames include the ABSOLUTE MINIMUM
// complexity of includes here. Use forward declarations wherever possible
Expand Down
21 changes: 1 addition & 20 deletions ccmain/linerec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@

namespace tesseract {

// Arbitarary penalty for non-dictionary words.
// TODO(rays) How to learn this?
const float kNonDictionaryPenalty = 5.0f;
// Scale factor to make certainty more comparable to Tesseract.
const float kCertaintyScale = 7.0f;
// Worst acceptable certainty for a dictionary word.
Expand Down Expand Up @@ -241,8 +238,7 @@ void Tesseract::LSTMRecognizeWord(const BLOCK& block, ROW *row, WERD_RES *word,
if (im_data == NULL) return;
lstm_recognizer_->RecognizeLine(*im_data, true, classify_debug_level > 0,
kWorstDictCertainty / kCertaintyScale,
lstm_use_matrix, &unicharset, word_box, 2.0,
false, words);
word_box, words);
delete im_data;
SearchWords(words);
}
Expand All @@ -268,17 +264,6 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
}
for (int w = 0; w < words->size(); ++w) {
WERD_RES* word = (*words)[w];
if (word->best_choice == NULL) {
// If we are using the beam search, the unicharset had better match!
word->SetupWordScript(unicharset);
WordSearch(word);
} else if (word->best_choice->unicharset() == &unicharset &&
!lstm_recognizer_->IsRecoding()) {
// We set up the word without using the dictionary, so set the permuter
// now, but we can only do it because the unicharsets match.
word->best_choice->set_permuter(
getDict().valid_word(*word->best_choice, true));
}
if (word->best_choice == NULL) {
// It is a dud.
word->SetupFake(lstm_recognizer_->GetUnicharset());
Expand All @@ -297,10 +282,6 @@ void Tesseract::SearchWords(PointerVector<WERD_RES>* words) {
float word_certainty = MIN(word->space_certainty,
word->best_choice->certainty());
word_certainty *= kCertaintyScale;
// Arbitrary ding factor for non-dictionary words.
if (!lstm_recognizer_->IsRecoding() &&
!Dict::valid_word_permuter(word->best_choice->permuter(), true))
word_certainty -= kNonDictionaryPenalty;
if (getDict().stopper_debug_level >= 1) {
tprintf("Best choice certainty=%g, space=%g, scaled=%g, final=%g\n",
word->best_choice->certainty(), word->space_certainty,
Expand Down
5 changes: 5 additions & 0 deletions ccutil/unicharcompress.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,11 @@ void UnicharCompress::SetupPassThrough(const UNICHARSET& unicharset) {
code.Set(0, u);
codes.push_back(code);
}
if (!unicharset.has_special_codes()) {
RecodedCharID code;
code.Set(0, unicharset.size());
codes.push_back(code);
}
SetupDirect(codes);
}

Expand Down
Loading

0 comments on commit 3ec11bd

Please sign in to comment.