Skip to content

Commit

Permalink
Optimize performance with clang-tidy
Browse files Browse the repository at this point in the history
The code was partially formatted with clang-format and optimized with

    clang-tidy --checks="-*,perfor*" --fix src/*/*.cpp

Signed-off-by: Stefan Weil <[email protected]>
  • Loading branch information
stweil committed Nov 14, 2021
1 parent e5011c5 commit d8d63fd
Show file tree
Hide file tree
Showing 53 changed files with 2,092 additions and 1,376 deletions.
104 changes: 63 additions & 41 deletions src/api/hocrrenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
tesseract::WritingDirection writing_direction;
tesseract::TextlineOrder textline_order;
float deskew_angle;
it->Orientation(&orientation, &writing_direction, &textline_order, &deskew_angle);
it->Orientation(&orientation, &writing_direction, &textline_order,
&deskew_angle);
return orientation;
}

Expand All @@ -49,7 +50,8 @@ static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) {
* method currently only inserts a 'textangle' property to indicate the rotation
* direction and does not add any baseline information to the hocr string.
*/
static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel level,
static void AddBaselineCoordsTohOCR(const PageIterator *it,
PageIteratorLevel level,
std::stringstream &hocr_str) {
tesseract::Orientation orientation = GetBlockTextOrientation(it);
if (orientation != ORIENTATION_PAGE_UP) {
Expand Down Expand Up @@ -82,7 +84,8 @@ static void AddBaselineCoordsTohOCR(const PageIterator *it, PageIteratorLevel le
double p1 = (y2 - y1) / static_cast<double>(x2 - x1);
double p0 = y1 - p1 * x1;

hocr_str << "; baseline " << round(p1 * 1000.0) / 1000.0 << " " << round(p0 * 1000.0) / 1000.0;
hocr_str << "; baseline " << round(p1 * 1000.0) / 1000.0 << " "
<< round(p0 * 1000.0) / 1000.0;
}

static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level,
Expand All @@ -91,16 +94,17 @@ static void AddBoxTohOCR(const ResultIterator *it, PageIteratorLevel level,
it->BoundingBox(level, &left, &top, &right, &bottom);
// This is the only place we use double quotes instead of single quotes,
// but it may too late to change for consistency
hocr_str << " title=\"bbox " << left << " " << top << " " << right << " " << bottom;
hocr_str << " title=\"bbox " << left << " " << top << " " << right << " "
<< bottom;
// Add baseline coordinates & heights for textlines only.
if (level == RIL_TEXTLINE) {
AddBaselineCoordsTohOCR(it, level, hocr_str);
// add custom height measures
float row_height, descenders, ascenders; // row attributes
it->RowAttributes(&row_height, &descenders, &ascenders);
// TODO(rays): Do we want to limit these to a single decimal place?
hocr_str << "; x_size " << row_height << "; x_descenders " << -descenders << "; x_ascenders "
<< ascenders;
hocr_str << "; x_size " << row_height << "; x_descenders " << -descenders
<< "; x_ascenders " << ascenders;
}
hocr_str << "\">";
}
Expand Down Expand Up @@ -128,7 +132,8 @@ char *TessBaseAPI::GetHOCRText(int page_number) {
* Returned string must be freed with the delete [] operator.
*/
char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(monitor) < 0)) {
if (tesseract_ == nullptr ||
(page_res_ == nullptr && Recognize(monitor) < 0)) {
return nullptr;
}

Expand All @@ -147,13 +152,16 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {

#ifdef _WIN32
// convert input name from ANSI encoding to utf-8
int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
int str16_len =
MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, nullptr, 0);
wchar_t *uni16_str = new WCHAR[str16_len];
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str, str16_len);
int utf8_len =
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr, 0, nullptr, nullptr);
str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_.c_str(), -1, uni16_str,
str16_len);
int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, nullptr,
0, nullptr, nullptr);
char *utf8_str = new char[utf8_len];
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len, nullptr, nullptr);
WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, utf8_len,
nullptr, nullptr);
input_file_ = utf8_str;
delete[] uni16_str;
delete[] utf8_str;
Expand All @@ -174,8 +182,8 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
hocr_str << "unknown";
}

hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " " << rect_width_ << " "
<< rect_height_ << "; ppageno " << page_number
hocr_str << "\"; bbox " << rect_left_ << " " << rect_top_ << " "
<< rect_width_ << " " << rect_height_ << "; ppageno " << page_number
<< "; scan_res " << GetSourceYResolution() << " "
<< GetSourceYResolution() << "'>\n";

Expand Down Expand Up @@ -230,7 +238,8 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {

// Now, process the word...
int32_t lstm_choice_mode = tesseract_->lstm_choice_mode;
std::vector<std::vector<std::vector<std::pair<const char *, float>>>> *rawTimestepMap = nullptr;
std::vector<std::vector<std::vector<std::pair<const char *, float>>>>
*rawTimestepMap = nullptr;
std::vector<std::vector<std::pair<const char *, float>>> *CTCMap = nullptr;
if (lstm_choice_mode) {
CTCMap = res_it->GetBestLSTMSymbolChoices();
Expand All @@ -244,10 +253,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
int pointsize, font_id;
const char *font_name;
res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom);
font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace, &serif,
&smallcaps, &pointsize, &font_id);
hocr_str << " title='bbox " << left << " " << top << " " << right << " " << bottom
<< "; x_wconf " << static_cast<int>(res_it->Confidence(RIL_WORD));
font_name =
res_it->WordFontAttributes(&bold, &italic, &underlined, &monospace,
&serif, &smallcaps, &pointsize, &font_id);
hocr_str << " title='bbox " << left << " " << top << " " << right << " "
<< bottom << "; x_wconf "
<< static_cast<int>(res_it->Confidence(RIL_WORD));
if (font_info) {
if (font_name) {
hocr_str << "; x_font " << HOcrEscape(font_name).c_str();
Expand Down Expand Up @@ -287,31 +298,36 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
hocr_str << "<em>";
}
do {
const std::unique_ptr<const char[]> grapheme(res_it->GetUTF8Text(RIL_SYMBOL));
const std::unique_ptr<const char[]> grapheme(
res_it->GetUTF8Text(RIL_SYMBOL));
if (grapheme && grapheme[0] != 0) {
if (hocr_boxes) {
res_it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom);
hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes " << left << " " << top
<< " " << right << " " << bottom << "; x_conf " << res_it->Confidence(RIL_SYMBOL)
<< "'>";
hocr_str << "\n <span class='ocrx_cinfo' title='x_bboxes "
<< left << " " << top << " " << right << " " << bottom
<< "; x_conf " << res_it->Confidence(RIL_SYMBOL) << "'>";
}
hocr_str << HOcrEscape(grapheme.get()).c_str();
if (hocr_boxes) {
hocr_str << "</span>";
tesseract::ChoiceIterator ci(*res_it);
if (lstm_choice_mode == 1 && ci.Timesteps() != nullptr) {
std::vector<std::vector<std::pair<const char *, float>>> *symbol = ci.Timesteps();
std::vector<std::vector<std::pair<const char *, float>>> *symbol =
ci.Timesteps();
hocr_str << "\n <span class='ocr_symbol'"
<< " id='"
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt << "'>";
for (auto timestep : *symbol) {
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt
<< "'>";
for (const auto &timestep : *symbol) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt << "'>";
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
for (auto conf : timestep) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< " title='x_confs " << int(conf.second * 100) << "'>"
<< HOcrEscape(conf.first).c_str() << "</span>";
++ccnt;
Expand All @@ -324,16 +340,18 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
} else if (lstm_choice_mode == 2) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt << "'>";
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
do {
const char *choice = ci.GetUTF8Text();
float choiceconf = ci.Confidence();
if (choice != nullptr) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< " title='x_confs " << choiceconf << "'>" << HOcrEscape(choice).c_str()
<< "</span>";
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< " title='x_confs " << choiceconf << "'>"
<< HOcrEscape(choice).c_str() << "</span>";
ccnt++;
}
} while (ci.Next());
Expand All @@ -352,18 +370,20 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
}
// If the lstm choice mode is required it is added here
if (lstm_choice_mode == 1 && !hocr_boxes && rawTimestepMap != nullptr) {
for (auto symbol : *rawTimestepMap) {
for (const auto &symbol : *rawTimestepMap) {
hocr_str << "\n <span class='ocr_symbol'"
<< " id='"
<< "symbol_" << page_id << "_" << wcnt << "_" << scnt << "'>";
for (auto timestep : symbol) {
for (const auto &timestep : symbol) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt << "'>";
<< "timestep" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
for (auto conf : timestep) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< " title='x_confs " << int(conf.second * 100) << "'>"
<< HOcrEscape(conf.first).c_str() << "</span>";
++ccnt;
Expand All @@ -375,11 +395,12 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
++scnt;
}
} else if (lstm_choice_mode == 2 && !hocr_boxes && CTCMap != nullptr) {
for (auto timestep : *CTCMap) {
for (const auto &timestep : *CTCMap) {
if (timestep.size() > 0) {
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt << "'>";
<< "lstm_choices_" << page_id << "_" << wcnt << "_" << tcnt
<< "'>";
for (auto &j : timestep) {
float conf = 100 - tesseract_->lstm_rating_coefficient * j.second;
if (conf < 0.0f) {
Expand All @@ -390,9 +411,10 @@ char *TessBaseAPI::GetHOCRText(ETEXT_DESC *monitor, int page_number) {
}
hocr_str << "\n <span class='ocrx_cinfo'"
<< " id='"
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt << "'"
<< " title='x_confs " << conf << "'>" << HOcrEscape(j.first).c_str()
<< "</span>";
<< "choice_" << page_id << "_" << wcnt << "_" << ccnt
<< "'"
<< " title='x_confs " << conf << "'>"
<< HOcrEscape(j.first).c_str() << "</span>";
ccnt++;
}
hocr_str << "</span>";
Expand Down
33 changes: 17 additions & 16 deletions src/ccmain/equationdetect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

#include <algorithm>
#include <cfloat>
#include <cmath>
#include <limits>
#include <memory>

Expand Down Expand Up @@ -189,11 +190,11 @@ void EquationDetect::IdentifySpecialText(BLOBNBOX *blobnbox, const int height_th
const float kConfScoreTh = -5.0f, kConfDiffTh = 1.8;
// The scores here are negative, so the max/min == fabs(min/max).
// float ratio = fmax(lang_score, equ_score) / fmin(lang_score, equ_score);
const float diff = fabs(lang_score - equ_score);
const float diff = std::fabs(lang_score - equ_score);
BlobSpecialTextType type = BSTT_NONE;

// Classification.
if (fmax(lang_score, equ_score) < kConfScoreTh) {
if (std::fmax(lang_score, equ_score) < kConfScoreTh) {
// If both score are very small, then mark it as unclear.
type = BSTT_UNCLEAR;
} else if (diff > kConfDiffTh && equ_score > lang_score) {
Expand Down Expand Up @@ -727,7 +728,7 @@ int EquationDetect::CountAlignment(const std::vector<int> &sorted_vec, const int
if (sorted_vec.empty()) {
return 0;
}
const int kDistTh = static_cast<int>(round(0.03f * resolution_));
const int kDistTh = static_cast<int>(std::round(0.03f * resolution_));
auto pos = std::upper_bound(sorted_vec.begin(), sorted_vec.end(), val);
if (pos > sorted_vec.begin()) {
--pos;
Expand Down Expand Up @@ -772,7 +773,7 @@ void EquationDetect::IdentifyInlinePartsHorizontal() {
ASSERT_HOST(cps_super_bbox_);
std::vector<ColPartition *> new_seeds;
const int kMarginDiffTh = IntCastRounded(0.5 * lang_tesseract_->source_resolution());
const int kGapTh = static_cast<int>(round(1.0f * lang_tesseract_->source_resolution()));
const int kGapTh = static_cast<int>(std::round(1.0f * lang_tesseract_->source_resolution()));
ColPartitionGridSearch search(part_grid_);
search.SetUniqueMode(true);
// The center x coordinate of the cp_super_bbox_.
Expand Down Expand Up @@ -923,8 +924,8 @@ bool EquationDetect::IsInline(const bool search_bottom, const int textparts_line
// Check if neighbor and part is inline similar.
const float kHeightRatioTh = 0.5;
const int kYGapTh = textparts_linespacing > 0
? textparts_linespacing + static_cast<int>(round(0.02f * resolution_))
: static_cast<int>(round(0.05f * resolution_)); // Default value.
? textparts_linespacing + static_cast<int>(std::round(0.02f * resolution_))
: static_cast<int>(std::round(0.05f * resolution_)); // Default value.
if (part_box.x_overlap(neighbor_box) && // Location feature.
part_box.y_gap(neighbor_box) <= kYGapTh && // Line spacing.
// Geo feature.
Expand Down Expand Up @@ -978,9 +979,9 @@ EquationDetect::IndentType EquationDetect::IsIndented(ColPartition *part) {
ColPartitionGridSearch search(part_grid_);
ColPartition *neighbor = nullptr;
const TBOX &part_box(part->bounding_box());
const int kXGapTh = static_cast<int>(round(0.5f * resolution_));
const int kRadiusTh = static_cast<int>(round(3.0f * resolution_));
const int kYGapTh = static_cast<int>(round(0.5f * resolution_));
const int kXGapTh = static_cast<int>(std::round(0.5f * resolution_));
const int kRadiusTh = static_cast<int>(std::round(3.0f * resolution_));
const int kYGapTh = static_cast<int>(std::round(0.5f * resolution_));

// Here we use a simple approximation algorithm: from the center of part, We
// perform the radius search, and check if we can find a neighboring partition
Expand Down Expand Up @@ -1080,7 +1081,7 @@ void EquationDetect::ExpandSeedHorizontal(const bool search_left, ColPartition *
std::vector<ColPartition *> *parts_to_merge) {
ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr);
const float kYOverlapTh = 0.6;
const int kXGapTh = static_cast<int>(round(0.2f * resolution_));
const int kXGapTh = static_cast<int>(std::round(0.2f * resolution_));

ColPartitionGridSearch search(part_grid_);
const TBOX &seed_box(seed->bounding_box());
Expand Down Expand Up @@ -1132,7 +1133,7 @@ void EquationDetect::ExpandSeedVertical(const bool search_bottom, ColPartition *
std::vector<ColPartition *> *parts_to_merge) {
ASSERT_HOST(seed != nullptr && parts_to_merge != nullptr && cps_super_bbox_ != nullptr);
const float kXOverlapTh = 0.4;
const int kYGapTh = static_cast<int>(round(0.2f * resolution_));
const int kYGapTh = static_cast<int>(std::round(0.2f * resolution_));

ColPartitionGridSearch search(part_grid_);
const TBOX &seed_box(seed->bounding_box());
Expand Down Expand Up @@ -1210,8 +1211,8 @@ void EquationDetect::ExpandSeedVertical(const bool search_bottom, ColPartition *
}

bool EquationDetect::IsNearSmallNeighbor(const TBOX &seed_box, const TBOX &part_box) const {
const int kXGapTh = static_cast<int>(round(0.25f * resolution_));
const int kYGapTh = static_cast<int>(round(0.05f * resolution_));
const int kXGapTh = static_cast<int>(std::round(0.25f * resolution_));
const int kYGapTh = static_cast<int>(std::round(0.05f * resolution_));

// Check geometric feature.
if (part_box.height() > seed_box.height() || part_box.width() > seed_box.width()) {
Expand Down Expand Up @@ -1266,7 +1267,7 @@ void EquationDetect::ProcessMathBlockSatelliteParts() {
int med_height = text_box.height();
if (text_parts.size() % 2 == 0 && text_parts.size() > 1) {
const TBOX &text_box = text_parts[text_parts.size() / 2 - 1]->bounding_box();
med_height = static_cast<int>(round(0.5f * (text_box.height() + med_height)));
med_height = static_cast<int>(std::round(0.5f * (text_box.height() + med_height)));
}

// Iterate every text_parts and check if it is a math block satellite.
Expand Down Expand Up @@ -1348,7 +1349,7 @@ bool EquationDetect::IsMathBlockSatellite(ColPartition *part,
ColPartition *EquationDetect::SearchNNVertical(const bool search_bottom, const ColPartition *part) {
ASSERT_HOST(part);
ColPartition *nearest_neighbor = nullptr, *neighbor = nullptr;
const int kYGapTh = static_cast<int>(round(resolution_ * 0.5f));
const int kYGapTh = static_cast<int>(std::round(resolution_ * 0.5f));

ColPartitionGridSearch search(part_grid_);
search.SetUniqueMode(true);
Expand Down Expand Up @@ -1383,7 +1384,7 @@ bool EquationDetect::IsNearMathNeighbor(const int y_gap, const ColPartition *nei
if (!neighbor) {
return false;
}
const int kYGapTh = static_cast<int>(round(resolution_ * 0.1f));
const int kYGapTh = static_cast<int>(std::round(resolution_ * 0.1f));
return neighbor->type() == PT_EQUATION && y_gap <= kYGapTh;
}

Expand Down
3 changes: 2 additions & 1 deletion src/ccmain/fixxht.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include <algorithm>
#include <cctype>
#include <cmath>
#include <cstring>

namespace tesseract {
Expand Down Expand Up @@ -205,7 +206,7 @@ float Tesseract::ComputeCompatibleXheight(WERD_RES *word_res, float *baseline_sh
new_xht / word_res->denorm.y_scale());
}
// The xheight must change by at least x_ht_min_change to be used.
if (fabs(new_xht - kBlnXHeight) >= x_ht_min_change) {
if (std::fabs(new_xht - kBlnXHeight) >= x_ht_min_change) {
return new_xht / word_res->denorm.y_scale();
} else {
return bottom_shift != 0 ? word_res->x_height : 0.0f;
Expand Down
Loading

0 comments on commit d8d63fd

Please sign in to comment.