diff --git a/src/api/baseapi.cpp b/src/api/baseapi.cpp
index 296e2a9866..93c02a0f5f 100644
--- a/src/api/baseapi.cpp
+++ b/src/api/baseapi.cpp
@@ -1606,12 +1606,11 @@ char* TessBaseAPI::GetHOCRText(ETEXT_DESC* monitor, int page_number) {
if (italic) hocr_str += "";
if (bold) hocr_str += "";
// If glyph confidence is required it is added here
- if (tesseract_->glyph_confidences && confidencemap != nullptr) {
+ if (tesseract_->glyph_confidences == 1 && confidencemap != nullptr) {
for (size_t i = 0; i < confidencemap->size(); i++) {
hocr_str += "\n ";
- //*
std::vector> timestep = (*confidencemap)[i];
for (std::pair conf : timestep) {
hocr_str += "";
gcnt++;
}
- //*/
hocr_str += "";
tcnt++;
}
+ } else if (tesseract_->glyph_confidences == 2 && confidencemap != nullptr) {
+ for (size_t i = 0; i < confidencemap->size(); i++) {
+ std::vector> timestep = (*confidencemap)[i];
+ if (timestep.size() > 0) {
+ hocr_str += "\n ";
+ for (size_t j = 1; j < timestep.size(); j++) {
+ hocr_str += "";
+ hocr_str += timestep[j].first;
+ hocr_str += "";
+ gcnt++;
+ }
+ hocr_str += "";
+ tcnt++;
+ }
+ }
}
hocr_str += "";
tcnt = 1;
diff --git a/src/ccmain/tesseractclass.cpp b/src/ccmain/tesseractclass.cpp
index 72138233bf..7b5d4beaef 100644
--- a/src/ccmain/tesseractclass.cpp
+++ b/src/ccmain/tesseractclass.cpp
@@ -508,7 +508,7 @@ Tesseract::Tesseract()
STRING_MEMBER(page_separator, "\f",
"Page separator (default is form feed control character)",
this->params()),
- BOOL_MEMBER(glyph_confidences, false,
+ INT_MEMBER(glyph_confidences, 0,
"Allows to include glyph confidences in the hOCR output",
this->params()),
diff --git a/src/ccmain/tesseractclass.h b/src/ccmain/tesseractclass.h
index c3fe7124b1..45fbc6c971 100644
--- a/src/ccmain/tesseractclass.h
+++ b/src/ccmain/tesseractclass.h
@@ -1114,7 +1114,8 @@ class Tesseract : public Wordrec {
"Preserve multiple interword spaces");
STRING_VAR_H(page_separator, "\f",
"Page separator (default is form feed control character)");
- BOOL_VAR_H(glyph_confidences, false, "Allows to include glyph confidences in the hOCR output");
+ INT_VAR_H(glyph_confidences, 0,
+ "Allows to include glyph confidences in the hOCR output");
//// ambigsrecog.cpp /////////////////////////////////////////////////////////
FILE *init_recog_training(const STRING &fname);
diff --git a/src/lstm/lstmrecognizer.cpp b/src/lstm/lstmrecognizer.cpp
index 7ef79d2457..62ca990051 100644
--- a/src/lstm/lstmrecognizer.cpp
+++ b/src/lstm/lstmrecognizer.cpp
@@ -172,7 +172,8 @@ bool LSTMRecognizer::LoadDictionary(const char* lang, TessdataManager* mgr) {
void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
bool debug, double worst_dict_cert,
const TBOX& line_box,
- PointerVector* words, bool glyph_confidences) {
+ PointerVector* words,
+ int glyph_confidences) {
NetworkIO outputs;
float scale_factor;
NetworkIO inputs;
diff --git a/src/lstm/lstmrecognizer.h b/src/lstm/lstmrecognizer.h
index 0755db9ac3..dcfbc2b5c2 100644
--- a/src/lstm/lstmrecognizer.h
+++ b/src/lstm/lstmrecognizer.h
@@ -185,7 +185,7 @@ class LSTMRecognizer {
void RecognizeLine(const ImageData& image_data, bool invert, bool debug,
double worst_dict_cert, const TBOX& line_box,
PointerVector* words,
- bool glyph_confidences = false);
+ int glyph_confidences = 0);
// Helper computes min and mean best results in the output.
void OutputStats(const NetworkIO& outputs,
diff --git a/src/lstm/recodebeam.cpp b/src/lstm/recodebeam.cpp
index 682484f1e7..ddad441c2c 100644
--- a/src/lstm/recodebeam.cpp
+++ b/src/lstm/recodebeam.cpp
@@ -22,6 +22,8 @@
#include "networkio.h"
#include "pageres.h"
#include "unicharcompress.h"
+#include
+#include