Skip to content

Commit

Permalink
Added the option to get the timesteps separated by the suggested segm…
Browse files Browse the repository at this point in the history
…entation

Signed-off-by: Noah Metzger <[email protected]>
  • Loading branch information
noahmetzger committed Mar 11, 2019
1 parent d2c3309 commit 754e38d
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 15 deletions.
37 changes: 36 additions & 1 deletion src/api/hocrrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0))
return nullptr;

int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, tcnt = 1, gcnt = 1;
int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1, scnt = 1, tcnt = 1, gcnt = 1;
int page_id = page_number + 1; // hOCR uses 1-based page numbers.
bool para_is_ltr = true; // Default direction is LTR
const char* paragraph_lang = nullptr;
Expand Down Expand Up @@ -215,8 +215,11 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
// Now, process the word...
std::vector<std::vector<std::pair<const char*, float>>>* confidencemap =
nullptr;
std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
symbolMap = nullptr;
if (tesseract_->lstm_choice_mode) {
confidencemap = res_it->GetBestLSTMSymbolChoices();
symbolMap = res_it->GetBestSegmentedLSTMSymbolChoices();
}
hocr_str << "\n <span class='ocrx_word'"
<< " id='"
Expand Down Expand Up @@ -324,6 +327,38 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
tcnt++;
}
}
} else if (tesseract_->lstm_choice_mode == 3 && symbolMap != nullptr) {
for (size_t j = 0; j < symbolMap->size(); j++) {
std::vector<std::vector<std::pair<const char*, float>>> timesteps =
(*symbolMap)[j];
hocr_str << "\n <span class='ocr_symbol'"
<< " id='"
<< "symbolstep_" << page_id << "_" << wcnt << "_" << scnt
<< "'>"
<< timesteps[0][0].first;
for (size_t i = 1; i < timesteps.size(); i++) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "timestep_" << page_id << "_" << wcnt << "_" << tcnt
<< "'"
<< ">";
std::vector<std::pair<const char*, float>> timestep =
timesteps[i];
for (std::pair<const char*, float> conf : timestep) {
hocr_str << "<span class='ocr_glyph'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << gcnt
<< "'"
<< " title='x_confs " << int(conf.second * 100) << "'>"
<< conf.first << "</span>";
gcnt++;
}
hocr_str << "</span>";
tcnt++;
}
hocr_str << "</span>";
scnt++;
}
}
hocr_str << "</span>";
tcnt = 1;
Expand Down
12 changes: 11 additions & 1 deletion src/ccmain/resultiterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -605,14 +605,24 @@ char* ResultIterator::GetUTF8Text(PageIteratorLevel level) const {
return result;
}

std::vector<std::vector<std::pair<const char*, float>>>* ResultIterator::GetBestLSTMSymbolChoices() const {
std::vector<std::vector<std::pair<const char*, float>>>*
ResultIterator::GetBestLSTMSymbolChoices() const {
if (it_->word() != nullptr) {
return &it_->word()->timesteps;
} else {
return nullptr;
}
}

std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
ResultIterator::GetBestSegmentedLSTMSymbolChoices() const {
if (it_->word() != nullptr) {
return &it_->word()->symbol_steps;
} else {
return nullptr;
}
}

void ResultIterator::AppendUTF8WordText(STRING *text) const {
if (!it_->word()) return;
ASSERT_HOST(it_->word()->best_choice != nullptr);
Expand Down
5 changes: 4 additions & 1 deletion src/ccmain/resultiterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@ class TESS_API ResultIterator : public LTRResultIterator {
/**
* Returns the LSTM choices for every LSTM timestep for the current word.
*/
virtual std::vector<std::vector<std::pair<const char*, float>>>* GetBestLSTMSymbolChoices() const;
virtual std::vector<std::vector<std::pair<const char*, float>>>*
GetBestLSTMSymbolChoices() const;
virtual std::vector<std::vector<std::vector<std::pair<const char*, float>>>>*
GetBestSegmentedLSTMSymbolChoices() const;

/**
* Return whether the current paragraph's dominant reading direction
Expand Down
4 changes: 3 additions & 1 deletion src/ccmain/tesseractclass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,9 @@ Tesseract::Tesseract()
"Allows to include alternative symbols choices in the hOCR output. "
"Valid input values are 0, 1 and 2. 0 is the default value. "
"With 1 the alternative symbol choices per timestep are included. "
"With 2 the alternative symbol choices are accumulated per character.",
"With 2 the alternative symbol choices are accumulated per character."
"With 3 the alternative symbol choices per timestep are included and "
"separated by the suggested segmentation of Tesseract",
this->params()),

backup_config_file_(nullptr),
Expand Down
4 changes: 3 additions & 1 deletion src/ccmain/tesseractclass.h
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,9 @@ class Tesseract : public Wordrec {
"Allows to include alternative symbols choices in the hOCR output. "
"Valid input values are 0, 1 and 2. 0 is the default value. "
"With 1 the alternative symbol choices per timestep are included. "
"With 2 the alternative symbol choices are accumulated per character.");
"With 2 the alternative symbol choices are accumulated per character."
"With 3 the alternative symbol choices per timestep are included and "
"separated by the suggested segmentation of Tesseract");

//// ambigsrecog.cpp /////////////////////////////////////////////////////////
FILE *init_recog_training(const STRING &fname);
Expand Down
2 changes: 2 additions & 0 deletions src/ccstruct/pageres.h
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ class WERD_RES : public ELIST_LINK {
GenericVector<int> blob_gaps;
// Stores the lstm choices of every timestep
std::vector<std::vector<std::pair<const char*, float>>> timesteps;
std::vector<std::vector<std::vector<std::pair<const char*, float>>>>
symbol_steps;
// Ratings matrix contains classifier choices for each classified combination
// of blobs. The dimension is the same as the number of blobs in chopped_word
// and the leading diagonal corresponds to classifier results of the blobs
Expand Down
46 changes: 37 additions & 9 deletions src/lstm/recodebeam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <deque>
#include <map>
#include <set>
#include <tuple>
#include <vector>

#include <algorithm>
Expand Down Expand Up @@ -187,7 +188,7 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
GenericVector<int> xcoords;
GenericVector<const RecodeNode*> best_nodes;
GenericVector<const RecodeNode*> second_nodes;
std::deque<std::pair<int,int>> best_choices;
std::deque<std::tuple<int, int, double>> best_choices;
ExtractBestPaths(&best_nodes, &second_nodes);
if (debug) {
DebugPath(unicharset, best_nodes);
Expand All @@ -201,13 +202,14 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
int timestepEnd = 0;
//if lstm choice mode is required in granularity level 2 it stores the x
//Coordinates of every chosen character to match the alternative choices to it
if (lstm_choice_mode == 2) {
if (lstm_choice_mode == 2 || lstm_choice_mode == 3) {
ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
&xcoords, &best_choices);
if (best_choices.size() > 0) {
current_char = best_choices.front().first;
timestepEnd = best_choices.front().second;
best_choices.pop_front();
current_char = std::get<0>(best_choices.front());
timestepEnd = std::get<1>(best_choices.front());
if(lstm_choice_mode == 2)
best_choices.pop_front();
}
} else {
ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
Expand Down Expand Up @@ -258,7 +260,7 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
choice_pairs.push_back(choice);
}
}
if ((best_choices.size() > 0 && i == best_choices.front().second - 1)
if ((best_choices.size() > 0 && i == std::get<1>(best_choices.front()) - 1)
|| i == xcoords[word_end]-1) {
std::map<const char*, float> summed_propabilities;
for (auto it = choice_pairs.begin(); it != choice_pairs.end(); ++it) {
Expand All @@ -283,7 +285,7 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
it->second));
}
if (best_choices.size() > 0) {
current_char = best_choices.front().first;
current_char = std::get<0>(best_choices.front());
best_choices.pop_front();
}
choice_pairs.clear();
Expand All @@ -292,6 +294,25 @@ void RecodeBeamSearch::ExtractBestPathAsWords(const TBOX& line_box,
}
}
timestepEnd = xcoords[word_end];
} else if (lstm_choice_mode == 3) {
std::vector<std::vector<std::pair<const char*, float>>> currentSymbol;
for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
if (i == std::get<1>(best_choices.front())) {
if (currentSymbol.size() > 0) {
word_res->symbol_steps.push_back(currentSymbol);
currentSymbol.clear();
}
std::vector<std::pair<const char*, float>> choice_Header;
choice_Header.push_back(std::pair<const char*, float>(
unicharset->id_to_unichar_ext(std::get<0>(best_choices.front())),
2.0));
currentSymbol.push_back(choice_Header);
if(best_choices.size()>1) best_choices.pop_front();
}
currentSymbol.push_back(timesteps[i]);
}
word_res->symbol_steps.push_back(currentSymbol);
timestepEnd = xcoords[word_end];
}
for (int i = word_start; i < word_end; ++i) {
BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST;
Expand Down Expand Up @@ -366,7 +387,7 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
const GenericVector<const RecodeNode*>& best_nodes,
GenericVector<int>* unichar_ids, GenericVector<float>* certs,
GenericVector<float>* ratings, GenericVector<int>* xcoords,
std::deque<std::pair<int, int>>* best_choices) {
std::deque<std::tuple<int, int, double>>* best_choices) {
unichar_ids->truncate(0);
certs->truncate(0);
ratings->truncate(0);
Expand All @@ -375,6 +396,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
int t = 0;
int width = best_nodes.size();
while (t < width) {
int id;
int tposition;
double certainty = 0.0;
double rating = 0.0;
while (t < width && best_nodes[t]->unichar_id == INVALID_UNICHAR_ID) {
Expand All @@ -396,7 +419,8 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
unichar_ids->push_back(unichar_id);
xcoords->push_back(t);
if (best_choices != nullptr) {
best_choices->push_back(std::pair<int, int>(unichar_id, t));
tposition = t;
id = unichar_id;
}
do {
double cert = best_nodes[t++]->certainty;
Expand All @@ -414,6 +438,10 @@ void RecodeBeamSearch::ExtractPathAsUnicharIds(
if (certainty < certs->back()) certs->back() = certainty;
ratings->back() += rating;
}
if (best_choices != nullptr) {
best_choices->push_back(
std::tuple<int, int, double>(id, tposition, rating));
}
}
xcoords->push_back(width);
}
Expand Down
3 changes: 2 additions & 1 deletion src/lstm/recodebeam.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "unicharcompress.h"
#include <deque>
#include <set>
#include <tuple>
#include <vector>

namespace tesseract {
Expand Down Expand Up @@ -281,7 +282,7 @@ class RecodeBeamSearch {
const GenericVector<const RecodeNode*>& best_nodes,
GenericVector<int>* unichar_ids, GenericVector<float>* certs,
GenericVector<float>* ratings, GenericVector<int>* xcoords,
std::deque<std::pair<int,int>>* best_choices = nullptr);
std::deque<std::tuple<int,int,double>>* best_choices = nullptr);

// Sets up a word with the ratings matrix and fake blobs with boxes in the
// right places.
Expand Down

0 comments on commit 754e38d

Please sign in to comment.