Skip to content

Commit

Permalink
Integrated accumulated Symbol Choice in the Choice Iterator and made …
Browse files Browse the repository at this point in the history
…the api lstm_choice_mode independent

Signed-off-by: Noah Metzger <[email protected]>
noahmetzger committed Mar 12, 2019
1 parent bc2b919 commit 5b3e2fe
Showing 10 changed files with 174 additions and 91 deletions.
34 changes: 18 additions & 16 deletions src/api/hocrrenderer.cpp
Original file line number Diff line number Diff line change
@@ -213,13 +213,17 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
}

// Now, process the word...
std::vector<std::vector<std::pair<const char*, float>>>* confidencemap =
std::vector<std::vector<std::pair<const char*, float>>>* rawTimestepMap =
nullptr;
std::vector<std::vector<std::pair<const char*, float>>>* choiceMap =
nullptr;
std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
symbolMap = nullptr;
if (tesseract_->lstm_choice_mode) {
confidencemap = res_it->GetBestLSTMSymbolChoices();
symbolMap = res_it->GetBestSegmentedLSTMSymbolChoices();

choiceMap = res_it->GetBestLSTMSymbolChoices();
symbolMap = res_it->GetSegmentedLSTMTimesteps();
rawTimestepMap = res_it->GetRawLSTMTimesteps();
}
hocr_str << "\n <span class='ocrx_word'"
<< " id='"
@@ -285,14 +289,14 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
if (italic) hocr_str << "</em>";
if (bold) hocr_str << "</strong>";
// If the lstm choice mode is required it is added here
if (tesseract_->lstm_choice_mode == 1 && confidencemap != nullptr) {
for (size_t i = 0; i < confidencemap->size(); i++) {
if (tesseract_->lstm_choice_mode == 1 && rawTimestepMap != nullptr) {
for (size_t i = 0; i < rawTimestepMap->size(); i++) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "timestep_" << page_id << "_" << wcnt << "_" << tcnt << "'"
<< ">";
std::vector<std::pair<const char*, float>> timestep =
(*confidencemap)[i];
(*rawTimestepMap)[i];
for (std::pair<const char*, float> conf : timestep) {
hocr_str << "<span class='ocr_glyph'"
<< " id='"
@@ -304,17 +308,16 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
hocr_str << "</span>";
tcnt++;
}
} else if (tesseract_->lstm_choice_mode == 2 && confidencemap != nullptr) {
for (size_t i = 0; i < confidencemap->size(); i++) {
} else if (tesseract_->lstm_choice_mode == 2 && choiceMap != nullptr) {
for (size_t i = 0; i < choiceMap->size(); i++) {
std::vector<std::pair<const char*, float>> timestep =
(*confidencemap)[i];
(*choiceMap)[i];
if (timestep.size() > 0) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
<< "'"
<< " chosen='" << timestep[0].first << "'>";
for (size_t j = 1; j < timestep.size(); j++) {
<< "'>";
for (size_t j = 0; j < timestep.size(); j++) {
hocr_str << "<span class='ocr_glyph'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << gcnt
@@ -333,10 +336,9 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
(*symbolMap)[j];
hocr_str << "\n <span class='ocr_symbol'"
<< " id='"
<< "symbolstep_" << page_id << "_" << wcnt << "_" << scnt
<< "'>"
<< timesteps[0][0].first;
for (size_t i = 1; i < timesteps.size(); i++) {
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt
<< "'>";
for (size_t i = 0; i < timesteps.size(); i++) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "timestep_" << page_id << "_" << wcnt << "_" << tcnt
100 changes: 77 additions & 23 deletions src/ccmain/ltrresultiterator.cpp
Original file line number Diff line number Diff line change
@@ -358,7 +358,17 @@ bool LTRResultIterator::SymbolIsDropcap() const {
ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) {
ASSERT_HOST(result_it.it_->word() != nullptr);
word_res_ = result_it.it_->word();
oemLSTM_ = word_res_->tesseract->AnyLSTMLang();
oemLegacy_ = word_res_->tesseract->AnyTessLang();
BLOB_CHOICE_LIST* choices = nullptr;
tstep_index_ = &result_it.blob_index_;
if (oemLSTM_ && !oemLegacy_ && &word_res_->accumulated_timesteps != nullptr) {
if (word_res_->leadingSpace)
LSTM_choices_ = &word_res_->accumulated_timesteps[(*tstep_index_) + 1];
else
LSTM_choices_ = &word_res_->accumulated_timesteps[*tstep_index_];
filterSpaces();
}
if (word_res_->ratings != nullptr)
choices = word_res_->GetBlobChoices(result_it.blob_index_);
if (choices != nullptr && !choices->empty()) {
@@ -367,49 +377,93 @@ ChoiceIterator::ChoiceIterator(const LTRResultIterator& result_it) {
} else {
choice_it_ = nullptr;
}
if (&word_res_->symbol_steps != nullptr && !word_res_->symbol_steps.empty()) {
symbol_step_it_ = word_res_->symbol_steps.begin();
if (LSTM_choices_ != nullptr && !LSTM_choices_->empty()) {
LSTM_mode_ = true;
LSTM_choice_it_ = LSTM_choices_->begin();
}
}

ChoiceIterator::~ChoiceIterator() { delete choice_it_; }

// Moves to the next choice for the symbol and returns false if there
// are none left.
bool ChoiceIterator::Next() {
if (choice_it_ == nullptr) return false;
if (&word_res_->symbol_steps != nullptr) {
if (symbol_step_it_ == word_res_->symbol_steps.end()) {
symbol_step_it_ = word_res_->symbol_steps.begin();
if (LSTM_mode_) {
if (LSTM_choice_it_ != LSTM_choices_->end() &&
next(LSTM_choice_it_) == LSTM_choices_->end()) {
return false;
} else {
symbol_step_it_++;
}
++LSTM_choice_it_;

This comment has been minimized.

Copy link
@amitdo

amitdo Nov 8, 2019

Collaborator

CID 1400763 Using invalid iterator (INVALIDATE_ITERATOR)
7. increment_iterator: Incrementing iterator this->LSTM_choice_it_ though it is already past the end of its container.

This comment has been minimized.

Copy link
@stweil

stweil Nov 8, 2019

Member

@noahmetzger, would the following code be correct?

if (LSTM_choice_it_ == LSTM_choices_->end() ||
    next(LSTM_choice_it_) == LSTM_choices_->end()) {
  return false;
...

This comment has been minimized.

Copy link
@noahmetzger

noahmetzger Nov 8, 2019

Author Contributor

I think this is for the case when the list is empty. If i remember right, when you use next(LSTM_choice_it) and the list is already at the end you ll get a nullpointer. So you have to make sure it first checks that before using the next method. With || it would use it even when the list is already at the end

This comment has been minimized.

Copy link
@amitdo

amitdo Nov 8, 2019

Collaborator

With || it would use it even when the list is already at the end

https://stackoverflow.com/a/7925696

This comment has been minimized.

Copy link
@noahmetzger

noahmetzger Nov 8, 2019

Author Contributor

my bad, you are right

This comment has been minimized.

Copy link
@amitdo

amitdo Nov 20, 2019

Collaborator

@stweil, a reminder...

return true;
}
} else {
if (choice_it_ == nullptr) return false;
choice_it_->forward();
return !choice_it_->cycled_list();
}
choice_it_->forward();
return !choice_it_->cycled_list();
}

// Returns the null terminated UTF-8 encoded text string for the current
// choice. Do NOT use delete [] to free after use.
const char* ChoiceIterator::GetUTF8Text() const {
if (choice_it_ == nullptr) return nullptr;
UNICHAR_ID id = choice_it_->data()->unichar_id();
return word_res_->uch_set->id_to_unichar_ext(id);
if (LSTM_mode_) {
std::pair<const char*, float> choice = *LSTM_choice_it_;
return choice.first;
} else {
if (choice_it_ == nullptr) return nullptr;
UNICHAR_ID id = choice_it_->data()->unichar_id();
return word_res_->uch_set->id_to_unichar_ext(id);
}
}

// Returns the confidence of the current choice.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
// Returns the confidence of the current choice depending on the used language
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
// choices for one symbol should roughly add up to 1.0f.
// If only traineddata of the legacy engine is used, the number should be
// interpreted as a percent probability. (0.0f-100.0f) In this case probabilities
// won't add up to 100. Each one stands on its own.
float ChoiceIterator::Confidence() const {
if (choice_it_ == nullptr) return 0.0f;
float confidence = 100 + 5 * choice_it_->data()->certainty();
if (confidence < 0.0f) confidence = 0.0f;
if (confidence > 100.0f) confidence = 100.0f;
return confidence;
if (LSTM_mode_) {
std::pair<const char*, float> choice = *LSTM_choice_it_;
return choice.second;
} else {
if (choice_it_ == nullptr) return 0.0f;
float confidence = 100 + 5 * choice_it_->data()->certainty();
if (confidence < 0.0f) confidence = 0.0f;
if (confidence > 100.0f) confidence = 100.0f;
return confidence;
}
}

// Returns the set of timesteps which belong to the current symbol
std::vector<std::vector<std::pair<const char*, float>>>*
ChoiceIterator::Timesteps() const {
if (&word_res_->symbol_steps == nullptr) return nullptr;
return &*symbol_step_it_;
if (&word_res_->symbol_steps == nullptr || !LSTM_mode_) return nullptr;

This comment has been minimized.

Copy link
@zdenop

zdenop Mar 13, 2019

Contributor

clang reports:

..\src\ccmain\ltrresultiterator.cpp(440,19):  warning: comparison of address of 'this->word_res_->symbol_steps' equal to a null pointer is always false [-Wtautological-pointer-compare]
  if (&word_res_->symbol_steps == nullptr || !LSTM_mode_) return nullptr;
       ~~~~~~~~~~~^~~~~~~~~~~~    ~~~~~~~

This comment has been minimized.

Copy link
@stweil

stweil Mar 13, 2019

Member

Fixed in commit ed84ba0. Thank you for reporting this.

if (word_res_->leadingSpace) {
return &word_res_->symbol_steps[*(tstep_index_) + 1];
} else {
return &word_res_->symbol_steps[*tstep_index_];
}
}

void ChoiceIterator::filterSpaces() {
if (LSTM_choices_->empty()) return;
std::vector<std::pair<const char*, float>>::iterator it =
LSTM_choices_->begin();
bool found_space = false;
float sum = 0;
for (it; it != LSTM_choices_->end();) {

This comment has been minimized.

Copy link
@zdenop

zdenop Mar 13, 2019

Contributor

clang reports:

..\src\ccmain\ltrresultiterator.cpp(454,8):  warning: expression result unused [-Wunused-value]
  for (it; it != LSTM_choices_->end();) {
       ^~

This comment has been minimized.

Copy link
@stweil

stweil Mar 13, 2019

Member

Thanks. Fixed in commit 4c2bbeb.

if (!strcmp(it->first, " ")) {
it = LSTM_choices_->erase(it);
found_space = true;
} else {
sum += it->second;
++it;
}
}
if (found_space) {
for (it = LSTM_choices_->begin(); it != LSTM_choices_->end(); ++it) {
it->second /= sum;
}
}
}
} // namespace tesseract.
25 changes: 18 additions & 7 deletions src/ccmain/ltrresultiterator.h
Original file line number Diff line number Diff line change
@@ -208,25 +208,36 @@ class ChoiceIterator {
// internal structure and should NOT be delete[]ed to free after use.
const char* GetUTF8Text() const;

// Returns the confidence of the current choice.
// The number should be interpreted as a percent probability. (0.0f-100.0f)
// Returns the confidence of the current choice depending on the used language
// data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
// choices for one symbol should roughly add up to 1.0f.
// If only traineddata of the legacy engine is used, the number should be
// interpreted as a percent probability. (0.0f-100.0f) In this case
// probabilities won't add up to 100. Each one stands on its own.
float Confidence() const;

// Returns a vector containing all timesteps, which belong to the currently
// selected symbol. A timestep is a vector containing pairs of symbols and
// floating point numbers. The number states the probability for the
// corresponding symbol.
std::vector<std::vector<std::pair<const char*, float>>>*
Timesteps() const;
std::vector<std::vector<std::pair<const char*, float>>>* Timesteps() const;

private:
//clears the remaining spaces out of the results and adapt the probabilities
void filterSpaces();
// Pointer to the WERD_RES object owned by the API.
WERD_RES* word_res_;
// Iterator over the blob choices.
BLOB_CHOICE_IT* choice_it_;
//Iterator over the symbol steps.
std::vector<std::vector<std::vector<std::pair<const char*, float>>>>::iterator
symbol_step_it_;
std::vector<std::pair<const char*, float>>* LSTM_choices_ = nullptr;
std::vector<std::pair<const char*, float>>::iterator LSTM_choice_it_;

const int* tstep_index_;
bool LSTM_mode_ = false;
//true when there is lstm engine related trained data
bool oemLSTM_;
// true when there is legacy engine related trained data
bool oemLegacy_;
};

} // namespace tesseract.
12 changes: 10 additions & 2 deletions src/ccmain/resultiterator.cpp
Original file line number Diff line number Diff line change
@@ -604,18 +604,26 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
strncpy(result, text.string(), length);
return result;
}
std::vector<std::vector<std::pair<const char*, float>>>*
ResultIterator::GetRawLSTMTimesteps() const {
if (it_->word() != nullptr) {
return &it_->word()->raw_timesteps;
} else {
return nullptr;
}
}

std::vector<std::vector<std::pair<const char*, float>>>*
ResultIterator::GetBestLSTMSymbolChoices() const {
if (it_->word() != nullptr) {
return &it_->word()->timesteps;
return &it_->word()->accumulated_timesteps;
} else {
return nullptr;
}
}

std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
ResultIterator::GetBestSegmentedLSTMSymbolChoices() const {
ResultIterator::GetSegmentedLSTMTimesteps() const {
if (it_->word() != nullptr) {
return &it_->word()->symbol_steps;
} else {
4 changes: 3 additions & 1 deletion src/ccmain/resultiterator.h
Original file line number Diff line number Diff line change
@@ -100,10 +100,12 @@ class TESS_API ResultIterator : public LTRResultIterator {
/**
* Returns the LSTM choices for every LSTM timestep for the current word.
*/
virtual std::vector<std::vector<std::pair<const char*, float>>>*
GetRawLSTMTimesteps() const;
virtual std::vector<std::vector<std::pair<const char*, float>>>*
GetBestLSTMSymbolChoices() const;
virtual std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
GetBestSegmentedLSTMSymbolChoices() const;
GetSegmentedLSTMTimesteps() const;

/**
* Return whether the current paragraph's dominant reading direction
9 changes: 5 additions & 4 deletions src/ccmain/tesseractclass.cpp
Original file line number Diff line number Diff line change
@@ -524,11 +524,12 @@ Tesseract::Tesseract()
this->params()),
INT_MEMBER(lstm_choice_mode, 0,
"Allows to include alternative symbols choices in the hOCR output. "
"Valid input values are 0, 1 and 2. 0 is the default value. "
"Valid input values are 0, 1, 2 and 3. 0 is the default value. "
"With 1 the alternative symbol choices per timestep are included. "
"With 2 the alternative symbol choices are accumulated per character."
"With 3 the alternative symbol choices per timestep are included and "
"separated by the suggested segmentation of Tesseract",
"With 2 the alternative symbol choices are accumulated per "
"character. "
"With 3 the alternative symbol choices per timestep are included "
"and separated by the suggested segmentation of Tesseract",
this->params()),

backup_config_file_(nullptr),
12 changes: 7 additions & 5 deletions src/ccmain/tesseractclass.h
Original file line number Diff line number Diff line change
@@ -1124,12 +1124,14 @@ class Tesseract : public Wordrec {
STRING_VAR_H(page_separator, "\f",
"Page separator (default is form feed control character)");
INT_VAR_H(lstm_choice_mode, 0,
"Allows to include alternative symbols choices in the hOCR output. "
"Valid input values are 0, 1 and 2. 0 is the default value. "
"Allows to include alternative symbols choices in the hOCR "
"output. "
"Valid input values are 0, 1, 2 and 3. 0 is the default value. "
"With 1 the alternative symbol choices per timestep are included. "
"With 2 the alternative symbol choices are accumulated per character."
"With 3 the alternative symbol choices per timestep are included and "
"separated by the suggested segmentation of Tesseract");
"With 2 the alternative symbol choices are accumulated per "
"character. "
"With 3 the alternative symbol choices per timestep are included "
"and separated by the suggested segmentation of Tesseract");

//// ambigsrecog.cpp /////////////////////////////////////////////////////////
FILE *init_recog_training(const STRING &fname);
5 changes: 4 additions & 1 deletion src/ccstruct/pageres.h
Original file line number Diff line number Diff line change
@@ -221,9 +221,12 @@ class WERD_RES : public ELIST_LINK {
// blob i and blob i+1.
GenericVector<int> blob_gaps;
// Stores the lstm choices of every timestep
std::vector<std::vector<std::pair<const char*, float>>> timesteps;
std::vector<std::vector<std::pair<const char*, float>>> raw_timesteps;
std::vector<std::vector<std::pair<const char*, float>>> accumulated_timesteps;
std::vector<std::vector<std::vector<std::pair<const char*, float>>>>
symbol_steps;
//Stores if the timestep vector starts with a space
bool leadingSpace = false;
// Ratings matrix contains classifier choices for each classified combination
// of blobs. The dimension is the same as the number of blobs in chopped_word
// and the leading diagonal corresponds to classifier results of the blobs
Loading

0 comments on commit 5b3e2fe

Please sign in to comment.